chiark / gitweb /
064445d17e00f30887240c1b159af3d0fae262df
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85
86 static char *arg_default_unit = NULL;
87 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
88
89 static bool arg_dump_core = true;
90 static bool arg_crash_shell = false;
91 static int arg_crash_chvt = -1;
92 static bool arg_confirm_spawn = false;
93 static bool arg_show_status = true;
94 static bool arg_switched_root = false;
95 static char ***arg_join_controllers = NULL;
96 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
97 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
98 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
99 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
100 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
101 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
102 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
103 static usec_t arg_runtime_watchdog = 0;
104 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
105 static char **arg_default_environment = NULL;
106 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
107 static uint64_t arg_capability_bounding_set_drop = 0;
108 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
109
110 static FILE* serialization = NULL;
111
112 static void nop_handler(int sig) {
113 }
114
115 noreturn static void crash(int sig) {
116
117         if (getpid() != 1)
118                 /* Pass this on immediately, if this is not PID 1 */
119                 raise(sig);
120         else if (!arg_dump_core)
121                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
122         else {
123                 struct sigaction sa = {
124                         .sa_handler = nop_handler,
125                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
126                 };
127                 pid_t pid;
128
129                 /* We want to wait for the core process, hence let's enable SIGCHLD */
130                 sigaction(SIGCHLD, &sa, NULL);
131
132                 pid = fork();
133                 if (pid < 0)
134                         log_error("Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
135
136                 else if (pid == 0) {
137                         struct rlimit rl = {};
138
139                         /* Enable default signal handler for core dump */
140                         zero(sa);
141                         sa.sa_handler = SIG_DFL;
142                         sigaction(sig, &sa, NULL);
143
144                         /* Don't limit the core dump size */
145                         rl.rlim_cur = RLIM_INFINITY;
146                         rl.rlim_max = RLIM_INFINITY;
147                         setrlimit(RLIMIT_CORE, &rl);
148
149                         /* Just to be sure... */
150                         chdir("/");
151
152                         /* Raise the signal again */
153                         raise(sig);
154
155                         assert_not_reached("We shouldn't be here...");
156                         _exit(1);
157
158                 } else {
159                         siginfo_t status;
160                         int r;
161
162                         /* Order things nicely. */
163                         r = wait_for_terminate(pid, &status);
164                         if (r < 0)
165                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
166                         else if (status.si_code != CLD_DUMPED)
167                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
168                         else
169                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
170                 }
171         }
172
173         if (arg_crash_chvt)
174                 chvt(arg_crash_chvt);
175
176         if (arg_crash_shell) {
177                 struct sigaction sa = {
178                         .sa_handler = SIG_IGN,
179                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
180                 };
181                 pid_t pid;
182
183                 log_info("Executing crash shell in 10s...");
184                 sleep(10);
185
186                 /* Let the kernel reap children for us */
187                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
188
189                 pid = fork();
190                 if (pid < 0)
191                         log_error("Failed to fork off crash shell: %m");
192                 else if (pid == 0) {
193                         make_console_stdio();
194                         execl("/bin/sh", "/bin/sh", NULL);
195
196                         log_error("execl() failed: %m");
197                         _exit(1);
198                 }
199
200                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
201         }
202
203         log_info("Freezing execution.");
204         freeze();
205 }
206
207 static void install_crash_handler(void) {
208         struct sigaction sa = {
209                 .sa_handler = crash,
210                 .sa_flags = SA_NODEFER,
211         };
212
213         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
214 }
215
216 static int console_setup(bool do_reset) {
217         int tty_fd, r;
218
219         /* If we are init, we connect stdin/stdout/stderr to /dev/null
220          * and make sure we don't have a controlling tty. */
221
222         release_terminal();
223
224         if (!do_reset)
225                 return 0;
226
227         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
228         if (tty_fd < 0) {
229                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
230                 return -tty_fd;
231         }
232
233         /* We don't want to force text mode.
234          * plymouth may be showing pictures already from initrd. */
235         r = reset_terminal_fd(tty_fd, false);
236         if (r < 0)
237                 log_error("Failed to reset /dev/console: %s", strerror(-r));
238
239         close_nointr_nofail(tty_fd);
240         return r;
241 }
242
243 static int set_default_unit(const char *u) {
244         char *c;
245
246         assert(u);
247
248         c = strdup(u);
249         if (!c)
250                 return -ENOMEM;
251
252         free(arg_default_unit);
253         arg_default_unit = c;
254
255         return 0;
256 }
257
258 static int parse_proc_cmdline_word(const char *word) {
259
260         static const char * const rlmap[] = {
261                 "emergency", SPECIAL_EMERGENCY_TARGET,
262                 "-b",        SPECIAL_EMERGENCY_TARGET,
263                 "single",    SPECIAL_RESCUE_TARGET,
264                 "-s",        SPECIAL_RESCUE_TARGET,
265                 "s",         SPECIAL_RESCUE_TARGET,
266                 "S",         SPECIAL_RESCUE_TARGET,
267                 "1",         SPECIAL_RESCUE_TARGET,
268                 "2",         SPECIAL_RUNLEVEL2_TARGET,
269                 "3",         SPECIAL_RUNLEVEL3_TARGET,
270                 "4",         SPECIAL_RUNLEVEL4_TARGET,
271                 "5",         SPECIAL_RUNLEVEL5_TARGET,
272         };
273
274         assert(word);
275
276         if (startswith(word, "systemd.unit=")) {
277
278                 if (!in_initrd())
279                         return set_default_unit(word + 13);
280
281         } else if (startswith(word, "rd.systemd.unit=")) {
282
283                 if (in_initrd())
284                         return set_default_unit(word + 16);
285
286         } else if (startswith(word, "systemd.log_target=")) {
287
288                 if (log_set_target_from_string(word + 19) < 0)
289                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
290
291         } else if (startswith(word, "systemd.log_level=")) {
292
293                 if (log_set_max_level_from_string(word + 18) < 0)
294                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
295
296         } else if (startswith(word, "systemd.log_color=")) {
297
298                 if (log_show_color_from_string(word + 18) < 0)
299                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
300
301         } else if (startswith(word, "systemd.log_location=")) {
302
303                 if (log_show_location_from_string(word + 21) < 0)
304                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
305
306         } else if (startswith(word, "systemd.dump_core=")) {
307                 int r;
308
309                 if ((r = parse_boolean(word + 18)) < 0)
310                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
311                 else
312                         arg_dump_core = r;
313
314         } else if (startswith(word, "systemd.crash_shell=")) {
315                 int r;
316
317                 if ((r = parse_boolean(word + 20)) < 0)
318                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
319                 else
320                         arg_crash_shell = r;
321
322         } else if (startswith(word, "systemd.confirm_spawn=")) {
323                 int r;
324
325                 if ((r = parse_boolean(word + 22)) < 0)
326                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
327                 else
328                         arg_confirm_spawn = r;
329
330         } else if (startswith(word, "systemd.crash_chvt=")) {
331                 int k;
332
333                 if (safe_atoi(word + 19, &k) < 0)
334                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
335                 else
336                         arg_crash_chvt = k;
337
338         } else if (startswith(word, "systemd.show_status=")) {
339                 int r;
340
341                 if ((r = parse_boolean(word + 20)) < 0)
342                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
343                 else
344                         arg_show_status = r;
345         } else if (startswith(word, "systemd.default_standard_output=")) {
346                 int r;
347
348                 if ((r = exec_output_from_string(word + 32)) < 0)
349                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
350                 else
351                         arg_default_std_output = r;
352         } else if (startswith(word, "systemd.default_standard_error=")) {
353                 int r;
354
355                 if ((r = exec_output_from_string(word + 31)) < 0)
356                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
357                 else
358                         arg_default_std_error = r;
359         } else if (startswith(word, "systemd.setenv=")) {
360                 _cleanup_free_ char *cenv = NULL;
361
362                 cenv = strdup(word + 15);
363                 if (!cenv)
364                         return -ENOMEM;
365
366                 if (env_assignment_is_valid(cenv)) {
367                         char **env;
368
369                         env = strv_env_set(arg_default_environment, cenv);
370                         if (env)
371                                 arg_default_environment = env;
372                         else
373                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
374                 } else
375                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
376
377         } else if (startswith(word, "systemd.") ||
378                    (in_initrd() && startswith(word, "rd.systemd."))) {
379
380                 const char *c;
381
382                 /* Ignore systemd.journald.xyz and friends */
383                 c = word;
384                 if (startswith(c, "rd."))
385                         c += 3;
386                 if (startswith(c, "systemd."))
387                         c += 8;
388                 if (c[strcspn(c, ".=")] != '.')  {
389
390                         log_warning("Unknown kernel switch %s. Ignoring.", word);
391
392                         log_info("Supported kernel switches:\n"
393                                  "systemd.unit=UNIT                        Default unit to start\n"
394                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
395                                  "systemd.dump_core=0|1                    Dump core on crash\n"
396                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
397                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
398                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
399                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
400                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
401                                  "                                         Log target\n"
402                                  "systemd.log_level=LEVEL                  Log level\n"
403                                  "systemd.log_color=0|1                    Highlight important log messages\n"
404                                  "systemd.log_location=0|1                 Include code location in log messages\n"
405                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
406                                  "                                         Set default log output for services\n"
407                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
408                                  "                                         Set default log error output for services\n"
409                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
410                 }
411
412         } else if (streq(word, "quiet"))
413                 arg_show_status = false;
414         else if (streq(word, "debug")) {
415                 /* Log to kmsg, the journal socket will fill up before the
416                  * journal is started and tools running during that time
417                  * will block with every log message for for 60 seconds,
418                  * before they give up. */
419                 log_set_max_level(LOG_DEBUG);
420                 log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG);
421         } else if (!in_initrd()) {
422                 unsigned i;
423
424                 /* SysV compatibility */
425                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
426                         if (streq(word, rlmap[i]))
427                                 return set_default_unit(rlmap[i+1]);
428         }
429
430         return 0;
431 }
432
433 #define DEFINE_SETTER(name, func, descr)                              \
434         static int name(const char *unit,                             \
435                         const char *filename,                         \
436                         unsigned line,                                \
437                         const char *section,                          \
438                         unsigned section_line,                        \
439                         const char *lvalue,                           \
440                         int ltype,                                    \
441                         const char *rvalue,                           \
442                         void *data,                                   \
443                         void *userdata) {                             \
444                                                                       \
445                 int r;                                                \
446                                                                       \
447                 assert(filename);                                     \
448                 assert(lvalue);                                       \
449                 assert(rvalue);                                       \
450                                                                       \
451                 r = func(rvalue);                                     \
452                 if (r < 0)                                            \
453                         log_syntax(unit, LOG_ERR, filename, line, -r, \
454                                    "Invalid " descr "'%s': %s",       \
455                                    rvalue, strerror(-r));             \
456                                                                       \
457                 return 0;                                             \
458         }
459
460 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
461 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
462 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
463 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
464
465 static int config_parse_cpu_affinity2(const char *unit,
466                                       const char *filename,
467                                       unsigned line,
468                                       const char *section,
469                                       unsigned section_line,
470                                       const char *lvalue,
471                                       int ltype,
472                                       const char *rvalue,
473                                       void *data,
474                                       void *userdata) {
475
476         char *w;
477         size_t l;
478         char *state;
479         cpu_set_t *c = NULL;
480         unsigned ncpus = 0;
481
482         assert(filename);
483         assert(lvalue);
484         assert(rvalue);
485
486         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
487                 char *t;
488                 int r;
489                 unsigned cpu;
490
491                 if (!(t = strndup(w, l)))
492                         return log_oom();
493
494                 r = safe_atou(t, &cpu);
495                 free(t);
496
497                 if (!c)
498                         if (!(c = cpu_set_malloc(&ncpus)))
499                                 return log_oom();
500
501                 if (r < 0 || cpu >= ncpus) {
502                         log_syntax(unit, LOG_ERR, filename, line, -r,
503                                    "Failed to parse CPU affinity '%s'", rvalue);
504                         CPU_FREE(c);
505                         return -EBADMSG;
506                 }
507
508                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
509         }
510
511         if (c) {
512                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
513                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
514
515                 CPU_FREE(c);
516         }
517
518         return 0;
519 }
520
521 static void strv_free_free(char ***l) {
522         char ***i;
523
524         if (!l)
525                 return;
526
527         for (i = l; *i; i++)
528                 strv_free(*i);
529
530         free(l);
531 }
532
533 static void free_join_controllers(void) {
534         strv_free_free(arg_join_controllers);
535         arg_join_controllers = NULL;
536 }
537
538 static int config_parse_join_controllers(const char *unit,
539                                          const char *filename,
540                                          unsigned line,
541                                          const char *section,
542                                          unsigned section_line,
543                                          const char *lvalue,
544                                          int ltype,
545                                          const char *rvalue,
546                                          void *data,
547                                          void *userdata) {
548
549         unsigned n = 0;
550         char *state, *w;
551         size_t length;
552
553         assert(filename);
554         assert(lvalue);
555         assert(rvalue);
556
557         free_join_controllers();
558
559         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
560                 char *s, **l;
561
562                 s = strndup(w, length);
563                 if (!s)
564                         return log_oom();
565
566                 l = strv_split(s, ",");
567                 free(s);
568
569                 strv_uniq(l);
570
571                 if (strv_length(l) <= 1) {
572                         strv_free(l);
573                         continue;
574                 }
575
576                 if (!arg_join_controllers) {
577                         arg_join_controllers = new(char**, 2);
578                         if (!arg_join_controllers) {
579                                 strv_free(l);
580                                 return log_oom();
581                         }
582
583                         arg_join_controllers[0] = l;
584                         arg_join_controllers[1] = NULL;
585
586                         n = 1;
587                 } else {
588                         char ***a;
589                         char ***t;
590
591                         t = new0(char**, n+2);
592                         if (!t) {
593                                 strv_free(l);
594                                 return log_oom();
595                         }
596
597                         n = 0;
598
599                         for (a = arg_join_controllers; *a; a++) {
600
601                                 if (strv_overlap(*a, l)) {
602                                         char **c;
603
604                                         c = strv_merge(*a, l);
605                                         if (!c) {
606                                                 strv_free(l);
607                                                 strv_free_free(t);
608                                                 return log_oom();
609                                         }
610
611                                         strv_free(l);
612                                         l = c;
613                                 } else {
614                                         char **c;
615
616                                         c = strv_copy(*a);
617                                         if (!c) {
618                                                 strv_free(l);
619                                                 strv_free_free(t);
620                                                 return log_oom();
621                                         }
622
623                                         t[n++] = c;
624                                 }
625                         }
626
627                         t[n++] = strv_uniq(l);
628
629                         strv_free_free(arg_join_controllers);
630                         arg_join_controllers = t;
631                 }
632         }
633
634         return 0;
635 }
636
637 static int parse_config_file(void) {
638
639         const ConfigTableItem items[] = {
640                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
641                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
642                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
643                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
644                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
645                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
646                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
647                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
648                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
649                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
650                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
651                 { "Manager", "DefaultTimeoutStartSec", config_parse_sec,         0, &arg_default_timeout_start_usec },
652                 { "Manager", "DefaultTimeoutStopSec", config_parse_sec,          0, &arg_default_timeout_stop_usec  },
653                 { "Manager", "DefaultRestartSec",     config_parse_sec,          0, &arg_default_restart_usec  },
654                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,      0, &arg_default_start_limit_interval },
655                 { "Manager", "DefaultStartLimitBurst", config_parse_unsigned,    0, &arg_default_start_limit_burst },
656                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
657                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
658                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
659                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
660                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
661                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
662                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
663                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
664                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
665                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
666                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
667                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
668                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
669                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
670                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
671                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
672                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
673                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
674                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
675                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
676                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
677                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
678                 { NULL, NULL, NULL, 0, NULL }
679         };
680
681         _cleanup_fclose_ FILE *f;
682         const char *fn;
683         int r;
684
685         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
686         f = fopen(fn, "re");
687         if (!f) {
688                 if (errno == ENOENT)
689                         return 0;
690
691                 log_warning("Failed to open configuration file '%s': %m", fn);
692                 return 0;
693         }
694
695         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
696         if (r < 0)
697                 log_warning("Failed to parse configuration file: %s", strerror(-r));
698
699         return 0;
700 }
701
702 static int parse_proc_cmdline(void) {
703         _cleanup_free_ char *line = NULL;
704         char *w, *state;
705         size_t l;
706         int r;
707
708         r = proc_cmdline(&line);
709         if (r < 0)
710                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
711         if (r <= 0)
712                 return 0;
713
714         FOREACH_WORD_QUOTED(w, l, line, state) {
715                 _cleanup_free_ char *word;
716
717                 word = strndup(w, l);
718                 if (!word)
719                         return log_oom();
720
721                 r = parse_proc_cmdline_word(word);
722                 if (r < 0) {
723                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
724                         return r;
725                 }
726         }
727
728         return 0;
729 }
730
731 static int parse_argv(int argc, char *argv[]) {
732
733         enum {
734                 ARG_LOG_LEVEL = 0x100,
735                 ARG_LOG_TARGET,
736                 ARG_LOG_COLOR,
737                 ARG_LOG_LOCATION,
738                 ARG_UNIT,
739                 ARG_SYSTEM,
740                 ARG_USER,
741                 ARG_TEST,
742                 ARG_VERSION,
743                 ARG_DUMP_CONFIGURATION_ITEMS,
744                 ARG_DUMP_CORE,
745                 ARG_CRASH_SHELL,
746                 ARG_CONFIRM_SPAWN,
747                 ARG_SHOW_STATUS,
748                 ARG_DESERIALIZE,
749                 ARG_SWITCHED_ROOT,
750                 ARG_DEFAULT_STD_OUTPUT,
751                 ARG_DEFAULT_STD_ERROR
752         };
753
754         static const struct option options[] = {
755                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
756                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
757                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
758                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
759                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
760                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
761                 { "user",                     no_argument,       NULL, ARG_USER                     },
762                 { "test",                     no_argument,       NULL, ARG_TEST                     },
763                 { "help",                     no_argument,       NULL, 'h'                          },
764                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
765                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
766                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
767                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
768                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
769                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
770                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
771                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
772                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
773                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
774                 { NULL,                       0,                 NULL, 0                            }
775         };
776
777         int c, r;
778
779         assert(argc >= 1);
780         assert(argv);
781
782         if (getpid() == 1)
783                 opterr = 0;
784
785         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
786
787                 switch (c) {
788
789                 case ARG_LOG_LEVEL:
790                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
791                                 log_error("Failed to parse log level %s.", optarg);
792                                 return r;
793                         }
794
795                         break;
796
797                 case ARG_LOG_TARGET:
798
799                         if ((r = log_set_target_from_string(optarg)) < 0) {
800                                 log_error("Failed to parse log target %s.", optarg);
801                                 return r;
802                         }
803
804                         break;
805
806                 case ARG_LOG_COLOR:
807
808                         if (optarg) {
809                                 if ((r = log_show_color_from_string(optarg)) < 0) {
810                                         log_error("Failed to parse log color setting %s.", optarg);
811                                         return r;
812                                 }
813                         } else
814                                 log_show_color(true);
815
816                         break;
817
818                 case ARG_LOG_LOCATION:
819
820                         if (optarg) {
821                                 if ((r = log_show_location_from_string(optarg)) < 0) {
822                                         log_error("Failed to parse log location setting %s.", optarg);
823                                         return r;
824                                 }
825                         } else
826                                 log_show_location(true);
827
828                         break;
829
830                 case ARG_DEFAULT_STD_OUTPUT:
831
832                         if ((r = exec_output_from_string(optarg)) < 0) {
833                                 log_error("Failed to parse default standard output setting %s.", optarg);
834                                 return r;
835                         } else
836                                 arg_default_std_output = r;
837                         break;
838
839                 case ARG_DEFAULT_STD_ERROR:
840
841                         if ((r = exec_output_from_string(optarg)) < 0) {
842                                 log_error("Failed to parse default standard error output setting %s.", optarg);
843                                 return r;
844                         } else
845                                 arg_default_std_error = r;
846                         break;
847
848                 case ARG_UNIT:
849
850                         if ((r = set_default_unit(optarg)) < 0) {
851                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
852                                 return r;
853                         }
854
855                         break;
856
857                 case ARG_SYSTEM:
858                         arg_running_as = SYSTEMD_SYSTEM;
859                         break;
860
861                 case ARG_USER:
862                         arg_running_as = SYSTEMD_USER;
863                         break;
864
865                 case ARG_TEST:
866                         arg_action = ACTION_TEST;
867                         break;
868
869                 case ARG_VERSION:
870                         arg_action = ACTION_VERSION;
871                         break;
872
873                 case ARG_DUMP_CONFIGURATION_ITEMS:
874                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
875                         break;
876
877                 case ARG_DUMP_CORE:
878                         r = optarg ? parse_boolean(optarg) : 1;
879                         if (r < 0) {
880                                 log_error("Failed to parse dump core boolean %s.", optarg);
881                                 return r;
882                         }
883                         arg_dump_core = r;
884                         break;
885
886                 case ARG_CRASH_SHELL:
887                         r = optarg ? parse_boolean(optarg) : 1;
888                         if (r < 0) {
889                                 log_error("Failed to parse crash shell boolean %s.", optarg);
890                                 return r;
891                         }
892                         arg_crash_shell = r;
893                         break;
894
895                 case ARG_CONFIRM_SPAWN:
896                         r = optarg ? parse_boolean(optarg) : 1;
897                         if (r < 0) {
898                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
899                                 return r;
900                         }
901                         arg_confirm_spawn = r;
902                         break;
903
904                 case ARG_SHOW_STATUS:
905                         r = optarg ? parse_boolean(optarg) : 1;
906                         if (r < 0) {
907                                 log_error("Failed to parse show status boolean %s.", optarg);
908                                 return r;
909                         }
910                         arg_show_status = r;
911                         break;
912
913                 case ARG_DESERIALIZE: {
914                         int fd;
915                         FILE *f;
916
917                         r = safe_atoi(optarg, &fd);
918                         if (r < 0 || fd < 0) {
919                                 log_error("Failed to parse deserialize option %s.", optarg);
920                                 return r < 0 ? r : -EINVAL;
921                         }
922
923                         fd_cloexec(fd, true);
924
925                         f = fdopen(fd, "r");
926                         if (!f) {
927                                 log_error("Failed to open serialization fd: %m");
928                                 return -errno;
929                         }
930
931                         if (serialization)
932                                 fclose(serialization);
933
934                         serialization = f;
935
936                         break;
937                 }
938
939                 case ARG_SWITCHED_ROOT:
940                         arg_switched_root = true;
941                         break;
942
943                 case 'h':
944                         arg_action = ACTION_HELP;
945                         break;
946
947                 case 'D':
948                         log_set_max_level(LOG_DEBUG);
949                         break;
950
951                 case 'b':
952                 case 's':
953                 case 'z':
954                         /* Just to eat away the sysvinit kernel
955                          * cmdline args without getopt() error
956                          * messages that we'll parse in
957                          * parse_proc_cmdline_word() or ignore. */
958
959                 case '?':
960                 default:
961                         if (getpid() != 1) {
962                                 log_error("Unknown option code %c", c);
963                                 return -EINVAL;
964                         }
965
966                         break;
967                 }
968
969         if (optind < argc && getpid() != 1) {
970                 /* Hmm, when we aren't run as init system
971                  * let's complain about excess arguments */
972
973                 log_error("Excess arguments.");
974                 return -EINVAL;
975         }
976
977         if (detect_container(NULL) > 0) {
978                 char **a;
979
980                 /* All /proc/cmdline arguments the kernel didn't
981                  * understand it passed to us. We're not really
982                  * interested in that usually since /proc/cmdline is
983                  * more interesting and complete. With one exception:
984                  * if we are run in a container /proc/cmdline is not
985                  * relevant for the container, hence we rely on argv[]
986                  * instead. */
987
988                 for (a = argv; a < argv + argc; a++) {
989                         r = parse_proc_cmdline_word(*a);
990                         if (r < 0) {
991                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
992                                 return r;
993                         }
994                 }
995         }
996
997         return 0;
998 }
999
1000 static int help(void) {
1001
1002         printf("%s [OPTIONS...]\n\n"
1003                "Starts up and maintains the system or user services.\n\n"
1004                "  -h --help                      Show this help\n"
1005                "     --test                      Determine startup sequence, dump it and exit\n"
1006                "     --dump-configuration-items  Dump understood unit configuration items\n"
1007                "     --unit=UNIT                 Set default unit\n"
1008                "     --system                    Run a system instance, even if PID != 1\n"
1009                "     --user                      Run a user instance\n"
1010                "     --dump-core[=0|1]           Dump core on crash\n"
1011                "     --crash-shell[=0|1]         Run shell on crash\n"
1012                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1013                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1014                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1015                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1016                "     --log-color[=0|1]           Highlight important log messages\n"
1017                "     --log-location[=0|1]        Include code location in log messages\n"
1018                "     --default-standard-output=  Set default standard output for services\n"
1019                "     --default-standard-error=   Set default standard error output for services\n",
1020                program_invocation_short_name);
1021
1022         return 0;
1023 }
1024
1025 static int version(void) {
1026         puts(PACKAGE_STRING);
1027         puts(SYSTEMD_FEATURES);
1028
1029         return 0;
1030 }
1031
1032 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1033         FILE *f = NULL;
1034         FDSet *fds = NULL;
1035         int r;
1036
1037         assert(m);
1038         assert(_f);
1039         assert(_fds);
1040
1041         r = manager_open_serialization(m, &f);
1042         if (r < 0) {
1043                 log_error("Failed to create serialization file: %s", strerror(-r));
1044                 goto fail;
1045         }
1046
1047         /* Make sure nothing is really destructed when we shut down */
1048         m->n_reloading ++;
1049         bus_manager_send_reloading(m, true);
1050
1051         fds = fdset_new();
1052         if (!fds) {
1053                 r = -ENOMEM;
1054                 log_error("Failed to allocate fd set: %s", strerror(-r));
1055                 goto fail;
1056         }
1057
1058         r = manager_serialize(m, f, fds, switching_root);
1059         if (r < 0) {
1060                 log_error("Failed to serialize state: %s", strerror(-r));
1061                 goto fail;
1062         }
1063
1064         if (fseeko(f, 0, SEEK_SET) < 0) {
1065                 log_error("Failed to rewind serialization fd: %m");
1066                 goto fail;
1067         }
1068
1069         r = fd_cloexec(fileno(f), false);
1070         if (r < 0) {
1071                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1072                 goto fail;
1073         }
1074
1075         r = fdset_cloexec(fds, false);
1076         if (r < 0) {
1077                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1078                 goto fail;
1079         }
1080
1081         *_f = f;
1082         *_fds = fds;
1083
1084         return 0;
1085
1086 fail:
1087         fdset_free(fds);
1088
1089         if (f)
1090                 fclose(f);
1091
1092         return r;
1093 }
1094
1095 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1096         struct rlimit nl;
1097         int r;
1098
1099         assert(saved_rlimit);
1100
1101         /* Save the original RLIMIT_NOFILE so that we can reset it
1102          * later when transitioning from the initrd to the main
1103          * systemd or suchlike. */
1104         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1105                 log_error("Reading RLIMIT_NOFILE failed: %m");
1106                 return -errno;
1107         }
1108
1109         /* Make sure forked processes get the default kernel setting */
1110         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1111                 struct rlimit *rl;
1112
1113                 rl = newdup(struct rlimit, saved_rlimit, 1);
1114                 if (!rl)
1115                         return log_oom();
1116
1117                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1118         }
1119
1120         /* Bump up the resource limit for ourselves substantially */
1121         nl.rlim_cur = nl.rlim_max = 64*1024;
1122         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1123         if (r < 0) {
1124                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1125                 return r;
1126         }
1127
1128         return 0;
1129 }
1130
1131 static void test_mtab(void) {
1132         char *p;
1133
1134         /* Check that /etc/mtab is a symlink */
1135
1136         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1137                 bool b;
1138
1139                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1140                 free(p);
1141
1142                 if (b)
1143                         return;
1144         }
1145
1146         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1147                     "This is not supported anymore. "
1148                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1149 }
1150
1151 static void test_usr(void) {
1152
1153         /* Check that /usr is not a separate fs */
1154
1155         if (dir_is_empty("/usr") <= 0)
1156                 return;
1157
1158         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1159                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1160                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1161 }
1162
1163 static void test_cgroups(void) {
1164
1165         if (access("/proc/cgroups", F_OK) >= 0)
1166                 return;
1167
1168         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1169                     "Systems without control groups are not supported. "
1170                     "We will now sleep for 10s, and then continue boot-up. "
1171                     "Expect breakage and please do not file bugs. "
1172                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1173                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1174
1175         sleep(10);
1176 }
1177
1178 static int initialize_join_controllers(void) {
1179         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1180          * + "net_prio". We'd like to add "cpuset" to the mix, but
1181          * "cpuset" does't really work for groups with no initialized
1182          * attributes. */
1183
1184         arg_join_controllers = new(char**, 3);
1185         if (!arg_join_controllers)
1186                 return -ENOMEM;
1187
1188         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1189         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1190         arg_join_controllers[2] = NULL;
1191
1192         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1193                 free_join_controllers();
1194                 return -ENOMEM;
1195         }
1196
1197         return 0;
1198 }
1199
1200 int main(int argc, char *argv[]) {
1201         Manager *m = NULL;
1202         int r, retval = EXIT_FAILURE;
1203         usec_t before_startup, after_startup;
1204         char timespan[FORMAT_TIMESPAN_MAX];
1205         FDSet *fds = NULL;
1206         bool reexecute = false;
1207         const char *shutdown_verb = NULL;
1208         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1209         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1210         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1211         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1212         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1213         static char systemd[] = "systemd";
1214         bool skip_setup = false;
1215         int j;
1216         bool loaded_policy = false;
1217         bool arm_reboot_watchdog = false;
1218         bool queue_default_job = false;
1219         char *switch_root_dir = NULL, *switch_root_init = NULL;
1220         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1221
1222 #ifdef HAVE_SYSV_COMPAT
1223         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1224                 /* This is compatibility support for SysV, where
1225                  * calling init as a user is identical to telinit. */
1226
1227                 errno = -ENOENT;
1228                 execv(SYSTEMCTL_BINARY_PATH, argv);
1229                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1230                 return 1;
1231         }
1232 #endif
1233
1234         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1235         dual_timestamp_get(&userspace_timestamp);
1236
1237         /* Determine if this is a reexecution or normal bootup. We do
1238          * the full command line parsing much later, so let's just
1239          * have a quick peek here. */
1240         if (strv_find(argv+1, "--deserialize"))
1241                 skip_setup = true;
1242
1243         /* If we have switched root, do all the special setup
1244          * things */
1245         if (strv_find(argv+1, "--switched-root"))
1246                 skip_setup = false;
1247
1248         /* If we get started via the /sbin/init symlink then we are
1249            called 'init'. After a subsequent reexecution we are then
1250            called 'systemd'. That is confusing, hence let's call us
1251            systemd right-away. */
1252         program_invocation_short_name = systemd;
1253         prctl(PR_SET_NAME, systemd);
1254
1255         saved_argv = argv;
1256         saved_argc = argc;
1257
1258         log_show_color(isatty(STDERR_FILENO) > 0);
1259
1260         /* Disable the umask logic */
1261         if (getpid() == 1)
1262                 umask(0);
1263
1264         if (getpid() == 1 && detect_container(NULL) <= 0) {
1265
1266                 /* Running outside of a container as PID 1 */
1267                 arg_running_as = SYSTEMD_SYSTEM;
1268                 make_null_stdio();
1269                 log_set_target(LOG_TARGET_KMSG);
1270                 log_open();
1271
1272                 if (in_initrd())
1273                         initrd_timestamp = userspace_timestamp;
1274
1275                 if (!skip_setup) {
1276                         mount_setup_early();
1277                         dual_timestamp_get(&security_start_timestamp);
1278                         if (selinux_setup(&loaded_policy) < 0)
1279                                 goto finish;
1280                         if (ima_setup() < 0)
1281                                 goto finish;
1282                         if (smack_setup() < 0)
1283                                 goto finish;
1284                         dual_timestamp_get(&security_finish_timestamp);
1285                 }
1286
1287                 if (label_init(NULL) < 0)
1288                         goto finish;
1289
1290                 if (!skip_setup) {
1291                         if (hwclock_is_localtime() > 0) {
1292                                 int min;
1293
1294                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1295                                 r = hwclock_set_timezone(&min);
1296                                 if (r < 0)
1297                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1298                                 else
1299                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1300                         } else if (!in_initrd()) {
1301                                 /*
1302                                  * Do dummy first-time call to seal the kernel's time warp magic
1303                                  *
1304                                  * Do not call this this from inside the initrd. The initrd might not
1305                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1306                                  * that way. In such case, we need to delay the time-warp or the sealing
1307                                  * until we reach the real system.
1308                                  */
1309                                 hwclock_reset_timezone();
1310
1311                                 /* Tell the kernel our timezone */
1312                                 r = hwclock_set_timezone(NULL);
1313                                 if (r < 0)
1314                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1315                         }
1316                 }
1317
1318                 /* Set the default for later on, but don't actually
1319                  * open the logs like this for now. Note that if we
1320                  * are transitioning from the initrd there might still
1321                  * be journal fd open, and we shouldn't attempt
1322                  * opening that before we parsed /proc/cmdline which
1323                  * might redirect output elsewhere. */
1324                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1325
1326         } else if (getpid() == 1) {
1327                 /* Running inside a container, as PID 1 */
1328                 arg_running_as = SYSTEMD_SYSTEM;
1329                 log_set_target(LOG_TARGET_CONSOLE);
1330                 log_close_console(); /* force reopen of /dev/console */
1331                 log_open();
1332
1333                 /* For the later on, see above... */
1334                 log_set_target(LOG_TARGET_JOURNAL);
1335
1336                 /* clear the kernel timestamp,
1337                  * because we are in a container */
1338                 kernel_timestamp.monotonic = 0ULL;
1339                 kernel_timestamp.realtime = 0ULL;
1340
1341         } else {
1342                 /* Running as user instance */
1343                 arg_running_as = SYSTEMD_USER;
1344                 log_set_target(LOG_TARGET_AUTO);
1345                 log_open();
1346
1347                 /* clear the kernel timestamp,
1348                  * because we are not PID 1 */
1349                 kernel_timestamp.monotonic = 0ULL;
1350                 kernel_timestamp.realtime = 0ULL;
1351         }
1352
1353         /* Initialize default unit */
1354         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1355         if (r < 0) {
1356                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1357                 goto finish;
1358         }
1359
1360         r = initialize_join_controllers();
1361         if (r < 0)
1362                 goto finish;
1363
1364         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1365          * /proc/$PID/fd is available. */
1366         if (getpid() == 1) {
1367                 r = mount_setup(loaded_policy);
1368                 if (r < 0)
1369                         goto finish;
1370         }
1371
1372         /* Reset all signal handlers. */
1373         assert_se(reset_all_signal_handlers() == 0);
1374
1375         ignore_signals(SIGNALS_IGNORE, -1);
1376
1377         if (parse_config_file() < 0)
1378                 goto finish;
1379
1380         if (arg_running_as == SYSTEMD_SYSTEM)
1381                 if (parse_proc_cmdline() < 0)
1382                         goto finish;
1383
1384         log_parse_environment();
1385
1386         if (parse_argv(argc, argv) < 0)
1387                 goto finish;
1388
1389         if (arg_action == ACTION_TEST &&
1390             geteuid() == 0) {
1391                 log_error("Don't run test mode as root.");
1392                 goto finish;
1393         }
1394
1395         if (arg_running_as == SYSTEMD_USER &&
1396             arg_action == ACTION_RUN &&
1397             sd_booted() <= 0) {
1398                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1399                 goto finish;
1400         }
1401
1402         if (arg_running_as == SYSTEMD_SYSTEM &&
1403             arg_action == ACTION_RUN &&
1404             running_in_chroot() > 0) {
1405                 log_error("Cannot be run in a chroot() environment.");
1406                 goto finish;
1407         }
1408
1409         if (arg_action == ACTION_HELP) {
1410                 retval = help();
1411                 goto finish;
1412         } else if (arg_action == ACTION_VERSION) {
1413                 retval = version();
1414                 goto finish;
1415         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1416                 unit_dump_config_items(stdout);
1417                 retval = EXIT_SUCCESS;
1418                 goto finish;
1419         } else if (arg_action == ACTION_DONE) {
1420                 retval = EXIT_SUCCESS;
1421                 goto finish;
1422         }
1423
1424         if (arg_running_as == SYSTEMD_USER &&
1425             !getenv("XDG_RUNTIME_DIR")) {
1426                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1427                 goto finish;
1428         }
1429
1430         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1431
1432         /* Close logging fds, in order not to confuse fdset below */
1433         log_close();
1434
1435         /* Remember open file descriptors for later deserialization */
1436         r = fdset_new_fill(&fds);
1437         if (r < 0) {
1438                 log_error("Failed to allocate fd set: %s", strerror(-r));
1439                 goto finish;
1440         } else
1441                 fdset_cloexec(fds, true);
1442
1443         if (serialization)
1444                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1445
1446         if (arg_running_as == SYSTEMD_SYSTEM)
1447                 /* Become a session leader if we aren't one yet. */
1448                 setsid();
1449
1450         /* Move out of the way, so that we won't block unmounts */
1451         assert_se(chdir("/")  == 0);
1452
1453         /* Reset the console, but only if this is really init and we
1454          * are freshly booted */
1455         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1456                 console_setup(getpid() == 1 && !skip_setup);
1457
1458         /* Open the logging devices, if possible and necessary */
1459         log_open();
1460
1461         /* Make sure we leave a core dump without panicing the
1462          * kernel. */
1463         if (getpid() == 1) {
1464                 install_crash_handler();
1465
1466                 r = mount_cgroup_controllers(arg_join_controllers);
1467                 if (r < 0)
1468                         goto finish;
1469         }
1470
1471         if (arg_running_as == SYSTEMD_SYSTEM) {
1472                 const char *virtualization = NULL;
1473
1474                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1475
1476                 detect_virtualization(&virtualization);
1477                 if (virtualization)
1478                         log_info("Detected virtualization '%s'.", virtualization);
1479
1480                 if (in_initrd())
1481                         log_info("Running in initial RAM disk.");
1482
1483         } else
1484                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1485
1486         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1487                 if (arg_show_status || plymouth_running())
1488                         status_welcome();
1489
1490 #ifdef HAVE_KMOD
1491                 if (detect_container(NULL) <= 0)
1492                         kmod_setup();
1493 #endif
1494                 hostname_setup();
1495                 machine_id_setup();
1496                 loopback_setup();
1497
1498                 test_mtab();
1499                 test_usr();
1500                 test_cgroups();
1501         }
1502
1503         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1504                 watchdog_set_timeout(&arg_runtime_watchdog);
1505
1506         if (arg_timer_slack_nsec != (nsec_t) -1)
1507                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1508                         log_error("Failed to adjust timer slack: %m");
1509
1510         if (arg_capability_bounding_set_drop) {
1511                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1512                 if (r < 0) {
1513                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1514                         goto finish;
1515                 }
1516                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1517                 if (r < 0) {
1518                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1519                         goto finish;
1520                 }
1521         }
1522
1523         if (arg_running_as == SYSTEMD_USER) {
1524                 /* Become reaper of our children */
1525                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1526                         log_warning("Failed to make us a subreaper: %m");
1527                         if (errno == EINVAL)
1528                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1529                 }
1530         }
1531
1532         if (arg_running_as == SYSTEMD_SYSTEM)
1533                 bump_rlimit_nofile(&saved_rlimit_nofile);
1534
1535         r = manager_new(arg_running_as, &m);
1536         if (r < 0) {
1537                 log_error("Failed to allocate manager object: %s", strerror(-r));
1538                 goto finish;
1539         }
1540
1541         m->confirm_spawn = arg_confirm_spawn;
1542         m->default_std_output = arg_default_std_output;
1543         m->default_std_error = arg_default_std_error;
1544         m->default_restart_usec = arg_default_restart_usec;
1545         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1546         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1547         m->default_start_limit_interval = arg_default_start_limit_interval;
1548         m->default_start_limit_burst = arg_default_start_limit_burst;
1549         m->runtime_watchdog = arg_runtime_watchdog;
1550         m->shutdown_watchdog = arg_shutdown_watchdog;
1551         m->userspace_timestamp = userspace_timestamp;
1552         m->kernel_timestamp = kernel_timestamp;
1553         m->initrd_timestamp = initrd_timestamp;
1554         m->security_start_timestamp = security_start_timestamp;
1555         m->security_finish_timestamp = security_finish_timestamp;
1556
1557         manager_set_default_rlimits(m, arg_default_rlimit);
1558
1559         if (arg_default_environment)
1560                 manager_environment_add(m, NULL, arg_default_environment);
1561
1562         manager_set_show_status(m, arg_show_status);
1563
1564         /* Remember whether we should queue the default job */
1565         queue_default_job = !serialization || arg_switched_root;
1566
1567         before_startup = now(CLOCK_MONOTONIC);
1568
1569         r = manager_startup(m, serialization, fds);
1570         if (r < 0)
1571                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1572
1573         /* This will close all file descriptors that were opened, but
1574          * not claimed by any unit. */
1575         fdset_free(fds);
1576         fds = NULL;
1577
1578         if (serialization) {
1579                 fclose(serialization);
1580                 serialization = NULL;
1581         }
1582
1583         if (queue_default_job) {
1584                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1585                 Unit *target = NULL;
1586                 Job *default_unit_job;
1587
1588                 log_debug("Activating default unit: %s", arg_default_unit);
1589
1590                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1591                 if (r < 0)
1592                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1593                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1594                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1595                 else if (target->load_state == UNIT_MASKED)
1596                         log_error("Default target masked.");
1597
1598                 if (!target || target->load_state != UNIT_LOADED) {
1599                         log_info("Trying to load rescue target...");
1600
1601                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1602                         if (r < 0) {
1603                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1604                                 goto finish;
1605                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1606                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1607                                 goto finish;
1608                         } else if (target->load_state == UNIT_MASKED) {
1609                                 log_error("Rescue target masked.");
1610                                 goto finish;
1611                         }
1612                 }
1613
1614                 assert(target->load_state == UNIT_LOADED);
1615
1616                 if (arg_action == ACTION_TEST) {
1617                         printf("-> By units:\n");
1618                         manager_dump_units(m, stdout, "\t");
1619                 }
1620
1621                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1622                 if (r == -EPERM) {
1623                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1624
1625                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1626                         if (r < 0) {
1627                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1628                                 goto finish;
1629                         }
1630                 } else if (r < 0) {
1631                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1632                         goto finish;
1633                 }
1634
1635                 m->default_unit_job_id = default_unit_job->id;
1636
1637                 after_startup = now(CLOCK_MONOTONIC);
1638                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1639                          "Loaded units and determined initial transaction in %s.",
1640                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1641
1642                 if (arg_action == ACTION_TEST) {
1643                         printf("-> By jobs:\n");
1644                         manager_dump_jobs(m, stdout, "\t");
1645                         retval = EXIT_SUCCESS;
1646                         goto finish;
1647                 }
1648         }
1649
1650         for (;;) {
1651                 r = manager_loop(m);
1652                 if (r < 0) {
1653                         log_error("Failed to run mainloop: %s", strerror(-r));
1654                         goto finish;
1655                 }
1656
1657                 switch (m->exit_code) {
1658
1659                 case MANAGER_EXIT:
1660                         retval = EXIT_SUCCESS;
1661                         log_debug("Exit.");
1662                         goto finish;
1663
1664                 case MANAGER_RELOAD:
1665                         log_info("Reloading.");
1666                         r = manager_reload(m);
1667                         if (r < 0)
1668                                 log_error("Failed to reload: %s", strerror(-r));
1669                         break;
1670
1671                 case MANAGER_REEXECUTE:
1672
1673                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1674                                 goto finish;
1675
1676                         reexecute = true;
1677                         log_notice("Reexecuting.");
1678                         goto finish;
1679
1680                 case MANAGER_SWITCH_ROOT:
1681                         /* Steal the switch root parameters */
1682                         switch_root_dir = m->switch_root;
1683                         switch_root_init = m->switch_root_init;
1684                         m->switch_root = m->switch_root_init = NULL;
1685
1686                         if (!switch_root_init)
1687                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1688                                         goto finish;
1689
1690                         reexecute = true;
1691                         log_notice("Switching root.");
1692                         goto finish;
1693
1694                 case MANAGER_REBOOT:
1695                 case MANAGER_POWEROFF:
1696                 case MANAGER_HALT:
1697                 case MANAGER_KEXEC: {
1698                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1699                                 [MANAGER_REBOOT] = "reboot",
1700                                 [MANAGER_POWEROFF] = "poweroff",
1701                                 [MANAGER_HALT] = "halt",
1702                                 [MANAGER_KEXEC] = "kexec"
1703                         };
1704
1705                         assert_se(shutdown_verb = table[m->exit_code]);
1706                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1707
1708                         log_notice("Shutting down.");
1709                         goto finish;
1710                 }
1711
1712                 default:
1713                         assert_not_reached("Unknown exit code.");
1714                 }
1715         }
1716
1717 finish:
1718         if (m)
1719                 manager_free(m);
1720
1721         for (j = 0; j < RLIMIT_NLIMITS; j++)
1722                 free(arg_default_rlimit[j]);
1723
1724         free(arg_default_unit);
1725         free_join_controllers();
1726
1727         label_finish();
1728
1729         if (reexecute) {
1730                 const char **args;
1731                 unsigned i, args_size;
1732
1733                 /* Close and disarm the watchdog, so that the new
1734                  * instance can reinitialize it, but doesn't get
1735                  * rebooted while we do that */
1736                 watchdog_close(true);
1737
1738                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1739                  * that the new systemd can pass the kernel default to
1740                  * its child processes */
1741                 if (saved_rlimit_nofile.rlim_cur > 0)
1742                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1743
1744                 if (switch_root_dir) {
1745                         /* Kill all remaining processes from the
1746                          * initrd, but don't wait for them, so that we
1747                          * can handle the SIGCHLD for them after
1748                          * deserializing. */
1749                         broadcast_signal(SIGTERM, false, true);
1750
1751                         /* And switch root */
1752                         r = switch_root(switch_root_dir);
1753                         if (r < 0)
1754                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1755                 }
1756
1757                 args_size = MAX(6, argc+1);
1758                 args = newa(const char*, args_size);
1759
1760                 if (!switch_root_init) {
1761                         char sfd[16];
1762
1763                         /* First try to spawn ourselves with the right
1764                          * path, and with full serialization. We do
1765                          * this only if the user didn't specify an
1766                          * explicit init to spawn. */
1767
1768                         assert(serialization);
1769                         assert(fds);
1770
1771                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1772                         char_array_0(sfd);
1773
1774                         i = 0;
1775                         args[i++] = SYSTEMD_BINARY_PATH;
1776                         if (switch_root_dir)
1777                                 args[i++] = "--switched-root";
1778                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1779                         args[i++] = "--deserialize";
1780                         args[i++] = sfd;
1781                         args[i++] = NULL;
1782
1783                         /* do not pass along the environment we inherit from the kernel or initrd */
1784                         if (switch_root_dir)
1785                                 clearenv();
1786
1787                         assert(i <= args_size);
1788                         execv(args[0], (char* const*) args);
1789                 }
1790
1791                 /* Try the fallback, if there is any, without any
1792                  * serialization. We pass the original argv[] and
1793                  * envp[]. (Well, modulo the ordering changes due to
1794                  * getopt() in argv[], and some cleanups in envp[],
1795                  * but let's hope that doesn't matter.) */
1796
1797                 if (serialization) {
1798                         fclose(serialization);
1799                         serialization = NULL;
1800                 }
1801
1802                 if (fds) {
1803                         fdset_free(fds);
1804                         fds = NULL;
1805                 }
1806
1807                 /* Reopen the console */
1808                 make_console_stdio();
1809
1810                 for (j = 1, i = 1; j < argc; j++)
1811                         args[i++] = argv[j];
1812                 args[i++] = NULL;
1813                 assert(i <= args_size);
1814
1815                 if (switch_root_init) {
1816                         args[0] = switch_root_init;
1817                         execv(args[0], (char* const*) args);
1818                         log_warning("Failed to execute configured init, trying fallback: %m");
1819                 }
1820
1821                 args[0] = "/sbin/init";
1822                 execv(args[0], (char* const*) args);
1823
1824                 if (errno == ENOENT) {
1825                         log_warning("No /sbin/init, trying fallback");
1826
1827                         args[0] = "/bin/sh";
1828                         args[1] = NULL;
1829                         execv(args[0], (char* const*) args);
1830                         log_error("Failed to execute /bin/sh, giving up: %m");
1831                 } else
1832                         log_warning("Failed to execute /sbin/init, giving up: %m");
1833         }
1834
1835         if (serialization)
1836                 fclose(serialization);
1837
1838         if (fds)
1839                 fdset_free(fds);
1840
1841 #ifdef HAVE_VALGRIND_VALGRIND_H
1842         /* If we are PID 1 and running under valgrind, then let's exit
1843          * here explicitly. valgrind will only generate nice output on
1844          * exit(), not on exec(), hence let's do the former not the
1845          * latter here. */
1846         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1847                 return 0;
1848 #endif
1849
1850         if (shutdown_verb) {
1851                 const char * command_line[] = {
1852                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1853                         shutdown_verb,
1854                         NULL
1855                 };
1856                 char **env_block;
1857
1858                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1859                         char e[32];
1860
1861                         /* If we reboot let's set the shutdown
1862                          * watchdog and tell the shutdown binary to
1863                          * repeatedly ping it */
1864                         watchdog_set_timeout(&arg_shutdown_watchdog);
1865                         watchdog_close(false);
1866
1867                         /* Tell the binary how often to ping */
1868                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1869                         char_array_0(e);
1870
1871                         env_block = strv_append(environ, e);
1872                 } else {
1873                         env_block = strv_copy(environ);
1874                         watchdog_close(true);
1875                 }
1876
1877                 /* Avoid the creation of new processes forked by the
1878                  * kernel; at this point, we will not listen to the
1879                  * signals anyway */
1880                 if (detect_container(NULL) <= 0)
1881                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1882
1883                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1884                 free(env_block);
1885                 log_error("Failed to execute shutdown binary, freezing: %m");
1886         }
1887
1888         if (getpid() == 1)
1889                 freeze();
1890
1891         return retval;
1892 }