chiark / gitweb /
core: uninstall cgroup agent only if we are running outside of a container
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dbus/dbus.h>
23
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <getopt.h>
31 #include <signal.h>
32 #include <sys/wait.h>
33 #include <fcntl.h>
34 #include <sys/prctl.h>
35 #include <sys/mount.h>
36
37 #include "manager.h"
38 #include "log.h"
39 #include "load-fragment.h"
40 #include "fdset.h"
41 #include "special.h"
42 #include "conf-parser.h"
43 #include "dbus-common.h"
44 #include "missing.h"
45 #include "label.h"
46 #include "build.h"
47 #include "strv.h"
48 #include "def.h"
49 #include "virt.h"
50 #include "watchdog.h"
51 #include "path-util.h"
52 #include "switch-root.h"
53 #include "capability.h"
54 #include "killall.h"
55 #include "env-util.h"
56 #include "hwclock.h"
57 #include "sd-daemon.h"
58 #include "sd-messages.h"
59
60 #include "mount-setup.h"
61 #include "loopback-setup.h"
62 #ifdef HAVE_KMOD
63 #include "kmod-setup.h"
64 #endif
65 #include "hostname-setup.h"
66 #include "machine-id-setup.h"
67 #include "locale-setup.h"
68 #include "selinux-setup.h"
69 #include "ima-setup.h"
70 #include "fileio.h"
71 #include "smack-setup.h"
72
73 static enum {
74         ACTION_RUN,
75         ACTION_HELP,
76         ACTION_VERSION,
77         ACTION_TEST,
78         ACTION_DUMP_CONFIGURATION_ITEMS,
79         ACTION_DONE
80 } arg_action = ACTION_RUN;
81
82 static char *arg_default_unit = NULL;
83 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
84
85 static bool arg_dump_core = true;
86 static bool arg_crash_shell = false;
87 static int arg_crash_chvt = -1;
88 static bool arg_confirm_spawn = false;
89 static bool arg_show_status = true;
90 static bool arg_switched_root = false;
91 static char ***arg_join_controllers = NULL;
92 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
93 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
94 static usec_t arg_runtime_watchdog = 0;
95 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
96 static char **arg_default_environment = NULL;
97 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
98 static uint64_t arg_capability_bounding_set_drop = 0;
99 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
100
101 static FILE* serialization = NULL;
102
103 static void nop_handler(int sig) {
104 }
105
106 _noreturn_ static void crash(int sig) {
107
108         if (getpid() != 1)
109                 /* Pass this on immediately, if this is not PID 1 */
110                 raise(sig);
111         else if (!arg_dump_core)
112                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
113         else {
114                 struct sigaction sa = {
115                         .sa_handler = nop_handler,
116                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
117                 };
118                 pid_t pid;
119
120                 /* We want to wait for the core process, hence let's enable SIGCHLD */
121                 sigaction(SIGCHLD, &sa, NULL);
122
123                 pid = fork();
124                 if (pid < 0)
125                         log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno));
126
127                 else if (pid == 0) {
128                         struct rlimit rl = {};
129
130                         /* Enable default signal handler for core dump */
131                         zero(sa);
132                         sa.sa_handler = SIG_DFL;
133                         sigaction(sig, &sa, NULL);
134
135                         /* Don't limit the core dump size */
136                         rl.rlim_cur = RLIM_INFINITY;
137                         rl.rlim_max = RLIM_INFINITY;
138                         setrlimit(RLIMIT_CORE, &rl);
139
140                         /* Just to be sure... */
141                         chdir("/");
142
143                         /* Raise the signal again */
144                         raise(sig);
145
146                         assert_not_reached("We shouldn't be here...");
147                         _exit(1);
148
149                 } else {
150                         siginfo_t status;
151                         int r;
152
153                         /* Order things nicely. */
154                         r = wait_for_terminate(pid, &status);
155                         if (r < 0)
156                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
157                         else if (status.si_code != CLD_DUMPED)
158                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
159                         else
160                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
161                 }
162         }
163
164         if (arg_crash_chvt)
165                 chvt(arg_crash_chvt);
166
167         if (arg_crash_shell) {
168                 struct sigaction sa = {
169                         .sa_handler = SIG_IGN,
170                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
171                 };
172                 pid_t pid;
173
174                 log_info("Executing crash shell in 10s...");
175                 sleep(10);
176
177                 /* Let the kernel reap children for us */
178                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
179
180                 pid = fork();
181                 if (pid < 0)
182                         log_error("Failed to fork off crash shell: %m");
183                 else if (pid == 0) {
184                         make_console_stdio();
185                         execl("/bin/sh", "/bin/sh", NULL);
186
187                         log_error("execl() failed: %m");
188                         _exit(1);
189                 }
190
191                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
192         }
193
194         log_info("Freezing execution.");
195         freeze();
196 }
197
198 static void install_crash_handler(void) {
199         struct sigaction sa = {
200                 .sa_handler = crash,
201                 .sa_flags = SA_NODEFER,
202         };
203
204         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
205 }
206
207 static int console_setup(bool do_reset) {
208         int tty_fd, r;
209
210         /* If we are init, we connect stdin/stdout/stderr to /dev/null
211          * and make sure we don't have a controlling tty. */
212
213         release_terminal();
214
215         if (!do_reset)
216                 return 0;
217
218         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
219         if (tty_fd < 0) {
220                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
221                 return -tty_fd;
222         }
223
224         /* We don't want to force text mode.
225          * plymouth may be showing pictures already from initrd. */
226         r = reset_terminal_fd(tty_fd, false);
227         if (r < 0)
228                 log_error("Failed to reset /dev/console: %s", strerror(-r));
229
230         close_nointr_nofail(tty_fd);
231         return r;
232 }
233
234 static int set_default_unit(const char *u) {
235         char *c;
236
237         assert(u);
238
239         c = strdup(u);
240         if (!c)
241                 return -ENOMEM;
242
243         free(arg_default_unit);
244         arg_default_unit = c;
245
246         return 0;
247 }
248
249 static int parse_proc_cmdline_word(const char *word) {
250
251         static const char * const rlmap[] = {
252                 "emergency", SPECIAL_EMERGENCY_TARGET,
253                 "-b",        SPECIAL_EMERGENCY_TARGET,
254                 "single",    SPECIAL_RESCUE_TARGET,
255                 "-s",        SPECIAL_RESCUE_TARGET,
256                 "s",         SPECIAL_RESCUE_TARGET,
257                 "S",         SPECIAL_RESCUE_TARGET,
258                 "1",         SPECIAL_RESCUE_TARGET,
259                 "2",         SPECIAL_RUNLEVEL2_TARGET,
260                 "3",         SPECIAL_RUNLEVEL3_TARGET,
261                 "4",         SPECIAL_RUNLEVEL4_TARGET,
262                 "5",         SPECIAL_RUNLEVEL5_TARGET,
263         };
264
265         assert(word);
266
267         if (startswith(word, "systemd.unit=")) {
268
269                 if (!in_initrd())
270                         return set_default_unit(word + 13);
271
272         } else if (startswith(word, "rd.systemd.unit=")) {
273
274                 if (in_initrd())
275                         return set_default_unit(word + 16);
276
277         } else if (startswith(word, "systemd.log_target=")) {
278
279                 if (log_set_target_from_string(word + 19) < 0)
280                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
281
282         } else if (startswith(word, "systemd.log_level=")) {
283
284                 if (log_set_max_level_from_string(word + 18) < 0)
285                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
286
287         } else if (startswith(word, "systemd.log_color=")) {
288
289                 if (log_show_color_from_string(word + 18) < 0)
290                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
291
292         } else if (startswith(word, "systemd.log_location=")) {
293
294                 if (log_show_location_from_string(word + 21) < 0)
295                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
296
297         } else if (startswith(word, "systemd.dump_core=")) {
298                 int r;
299
300                 if ((r = parse_boolean(word + 18)) < 0)
301                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
302                 else
303                         arg_dump_core = r;
304
305         } else if (startswith(word, "systemd.crash_shell=")) {
306                 int r;
307
308                 if ((r = parse_boolean(word + 20)) < 0)
309                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
310                 else
311                         arg_crash_shell = r;
312
313         } else if (startswith(word, "systemd.confirm_spawn=")) {
314                 int r;
315
316                 if ((r = parse_boolean(word + 22)) < 0)
317                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
318                 else
319                         arg_confirm_spawn = r;
320
321         } else if (startswith(word, "systemd.crash_chvt=")) {
322                 int k;
323
324                 if (safe_atoi(word + 19, &k) < 0)
325                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
326                 else
327                         arg_crash_chvt = k;
328
329         } else if (startswith(word, "systemd.show_status=")) {
330                 int r;
331
332                 if ((r = parse_boolean(word + 20)) < 0)
333                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
334                 else
335                         arg_show_status = r;
336         } else if (startswith(word, "systemd.default_standard_output=")) {
337                 int r;
338
339                 if ((r = exec_output_from_string(word + 32)) < 0)
340                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
341                 else
342                         arg_default_std_output = r;
343         } else if (startswith(word, "systemd.default_standard_error=")) {
344                 int r;
345
346                 if ((r = exec_output_from_string(word + 31)) < 0)
347                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
348                 else
349                         arg_default_std_error = r;
350         } else if (startswith(word, "systemd.setenv=")) {
351                 _cleanup_free_ char *cenv = NULL;
352                 char *eq;
353                 int r;
354
355                 cenv = strdup(word + 15);
356                 if (!cenv)
357                         return -ENOMEM;
358
359                 eq = strchr(cenv, '=');
360                 if (!eq) {
361                         if (!env_name_is_valid(cenv))
362                                 log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
363                         else  {
364                                 r = unsetenv(cenv);
365                                 if (r < 0)
366                                         log_warning("Unsetting environment variable '%s' failed, ignoring: %m", cenv);
367                         }
368                 } else {
369                         if (!env_assignment_is_valid(cenv))
370                                 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", cenv);
371                         else {
372                                 *eq = 0;
373                                 r = setenv(cenv, eq + 1, 1);
374                                 if (r < 0)
375                                         log_warning("Setting environment variable '%s=%s' failed, ignoring: %m", cenv, eq + 1);
376                         }
377                 }
378
379         } else if (startswith(word, "systemd.") ||
380                    (in_initrd() && startswith(word, "rd.systemd."))) {
381
382                 const char *c;
383
384                 /* Ignore systemd.journald.xyz and friends */
385                 c = word;
386                 if (startswith(c, "rd."))
387                         c += 3;
388                 if (startswith(c, "systemd."))
389                         c += 8;
390                 if (c[strcspn(c, ".=")] != '.')  {
391
392                         log_warning("Unknown kernel switch %s. Ignoring.", word);
393
394                         log_info("Supported kernel switches:\n"
395                                  "systemd.unit=UNIT                        Default unit to start\n"
396                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
397                                  "systemd.dump_core=0|1                    Dump core on crash\n"
398                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
399                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
400                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
401                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
402                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
403                                  "                                         Log target\n"
404                                  "systemd.log_level=LEVEL                  Log level\n"
405                                  "systemd.log_color=0|1                    Highlight important log messages\n"
406                                  "systemd.log_location=0|1                 Include code location in log messages\n"
407                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
408                                  "                                         Set default log output for services\n"
409                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
410                                  "                                         Set default log error output for services\n"
411                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
412                 }
413
414         } else if (streq(word, "quiet"))
415                 arg_show_status = false;
416         else if (streq(word, "debug"))
417                 log_set_max_level(LOG_DEBUG);
418         else if (!in_initrd()) {
419                 unsigned i;
420
421                 /* SysV compatibility */
422                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
423                         if (streq(word, rlmap[i]))
424                                 return set_default_unit(rlmap[i+1]);
425         }
426
427         return 0;
428 }
429
430 #define DEFINE_SETTER(name, func, descr)                              \
431         static int name(const char *unit,                             \
432                         const char *filename,                         \
433                         unsigned line,                                \
434                         const char *section,                          \
435                         const char *lvalue,                           \
436                         int ltype,                                    \
437                         const char *rvalue,                           \
438                         void *data,                                   \
439                         void *userdata) {                             \
440                                                                       \
441                 int r;                                                \
442                                                                       \
443                 assert(filename);                                     \
444                 assert(lvalue);                                       \
445                 assert(rvalue);                                       \
446                                                                       \
447                 r = func(rvalue);                                     \
448                 if (r < 0)                                            \
449                         log_syntax(unit, LOG_ERR, filename, line, -r, \
450                                    "Invalid " descr "'%s': %s",       \
451                                    rvalue, strerror(-r));             \
452                                                                       \
453                 return 0;                                             \
454         }
455
456 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
457 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
458 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
459 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
460
461
462 static int config_parse_cpu_affinity2(const char *unit,
463                                       const char *filename,
464                                       unsigned line,
465                                       const char *section,
466                                       const char *lvalue,
467                                       int ltype,
468                                       const char *rvalue,
469                                       void *data,
470                                       void *userdata) {
471
472         char *w;
473         size_t l;
474         char *state;
475         cpu_set_t *c = NULL;
476         unsigned ncpus = 0;
477
478         assert(filename);
479         assert(lvalue);
480         assert(rvalue);
481
482         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
483                 char *t;
484                 int r;
485                 unsigned cpu;
486
487                 if (!(t = strndup(w, l)))
488                         return log_oom();
489
490                 r = safe_atou(t, &cpu);
491                 free(t);
492
493                 if (!c)
494                         if (!(c = cpu_set_malloc(&ncpus)))
495                                 return log_oom();
496
497                 if (r < 0 || cpu >= ncpus) {
498                         log_syntax(unit, LOG_ERR, filename, line, -r,
499                                    "Failed to parse CPU affinity '%s'", rvalue);
500                         CPU_FREE(c);
501                         return -EBADMSG;
502                 }
503
504                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
505         }
506
507         if (c) {
508                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
509                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
510
511                 CPU_FREE(c);
512         }
513
514         return 0;
515 }
516
517 static void strv_free_free(char ***l) {
518         char ***i;
519
520         if (!l)
521                 return;
522
523         for (i = l; *i; i++)
524                 strv_free(*i);
525
526         free(l);
527 }
528
529 static void free_join_controllers(void) {
530         strv_free_free(arg_join_controllers);
531         arg_join_controllers = NULL;
532 }
533
534 static int config_parse_join_controllers(const char *unit,
535                                          const char *filename,
536                                          unsigned line,
537                                          const char *section,
538                                          const char *lvalue,
539                                          int ltype,
540                                          const char *rvalue,
541                                          void *data,
542                                          void *userdata) {
543
544         unsigned n = 0;
545         char *state, *w;
546         size_t length;
547
548         assert(filename);
549         assert(lvalue);
550         assert(rvalue);
551
552         free_join_controllers();
553
554         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
555                 char *s, **l;
556
557                 s = strndup(w, length);
558                 if (!s)
559                         return log_oom();
560
561                 l = strv_split(s, ",");
562                 free(s);
563
564                 strv_uniq(l);
565
566                 if (strv_length(l) <= 1) {
567                         strv_free(l);
568                         continue;
569                 }
570
571                 if (!arg_join_controllers) {
572                         arg_join_controllers = new(char**, 2);
573                         if (!arg_join_controllers) {
574                                 strv_free(l);
575                                 return log_oom();
576                         }
577
578                         arg_join_controllers[0] = l;
579                         arg_join_controllers[1] = NULL;
580
581                         n = 1;
582                 } else {
583                         char ***a;
584                         char ***t;
585
586                         t = new0(char**, n+2);
587                         if (!t) {
588                                 strv_free(l);
589                                 return log_oom();
590                         }
591
592                         n = 0;
593
594                         for (a = arg_join_controllers; *a; a++) {
595
596                                 if (strv_overlap(*a, l)) {
597                                         char **c;
598
599                                         c = strv_merge(*a, l);
600                                         if (!c) {
601                                                 strv_free(l);
602                                                 strv_free_free(t);
603                                                 return log_oom();
604                                         }
605
606                                         strv_free(l);
607                                         l = c;
608                                 } else {
609                                         char **c;
610
611                                         c = strv_copy(*a);
612                                         if (!c) {
613                                                 strv_free(l);
614                                                 strv_free_free(t);
615                                                 return log_oom();
616                                         }
617
618                                         t[n++] = c;
619                                 }
620                         }
621
622                         t[n++] = strv_uniq(l);
623
624                         strv_free_free(arg_join_controllers);
625                         arg_join_controllers = t;
626                 }
627         }
628
629         return 0;
630 }
631
632 static int parse_config_file(void) {
633
634         const ConfigTableItem items[] = {
635                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
636                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
637                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
638                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
639                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
640                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
641                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
642                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
643                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
644                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
645                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
646                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
647                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
648                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
649                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
650                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
651                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
652                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
653                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
654                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
655                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
656                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
657                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
658                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
659                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
660                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
661                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
662                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
663                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
664                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
665                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
666                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
667                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
668                 { NULL, NULL, NULL, 0, NULL }
669         };
670
671         _cleanup_fclose_ FILE *f;
672         const char *fn;
673         int r;
674
675         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
676         f = fopen(fn, "re");
677         if (!f) {
678                 if (errno == ENOENT)
679                         return 0;
680
681                 log_warning("Failed to open configuration file '%s': %m", fn);
682                 return 0;
683         }
684
685         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
686         if (r < 0)
687                 log_warning("Failed to parse configuration file: %s", strerror(-r));
688
689         return 0;
690 }
691
692 static int parse_proc_cmdline(void) {
693         _cleanup_free_ char *line = NULL;
694         char *w, *state;
695         int r;
696         size_t l;
697
698         /* Don't read /proc/cmdline if we are in a container, since
699          * that is only relevant for the host system */
700         if (detect_container(NULL) > 0)
701                 return 0;
702
703         r = read_one_line_file("/proc/cmdline", &line);
704         if (r < 0) {
705                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
706                 return 0;
707         }
708
709         FOREACH_WORD_QUOTED(w, l, line, state) {
710                 _cleanup_free_ char *word;
711
712                 word = strndup(w, l);
713                 if (!word)
714                         return log_oom();
715
716                 r = parse_proc_cmdline_word(word);
717                 if (r < 0) {
718                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
719                         return r;
720                 }
721         }
722
723         return 0;
724 }
725
726 static int parse_argv(int argc, char *argv[]) {
727
728         enum {
729                 ARG_LOG_LEVEL = 0x100,
730                 ARG_LOG_TARGET,
731                 ARG_LOG_COLOR,
732                 ARG_LOG_LOCATION,
733                 ARG_UNIT,
734                 ARG_SYSTEM,
735                 ARG_USER,
736                 ARG_TEST,
737                 ARG_VERSION,
738                 ARG_DUMP_CONFIGURATION_ITEMS,
739                 ARG_DUMP_CORE,
740                 ARG_CRASH_SHELL,
741                 ARG_CONFIRM_SPAWN,
742                 ARG_SHOW_STATUS,
743                 ARG_DESERIALIZE,
744                 ARG_SWITCHED_ROOT,
745                 ARG_INTROSPECT,
746                 ARG_DEFAULT_STD_OUTPUT,
747                 ARG_DEFAULT_STD_ERROR
748         };
749
750         static const struct option options[] = {
751                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
752                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
753                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
754                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
755                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
756                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
757                 { "user",                     no_argument,       NULL, ARG_USER                     },
758                 { "test",                     no_argument,       NULL, ARG_TEST                     },
759                 { "help",                     no_argument,       NULL, 'h'                          },
760                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
761                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
762                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
763                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
764                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
765                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
766                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
767                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
768                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
769                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
770                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
771                 { NULL,                       0,                 NULL, 0                            }
772         };
773
774         int c, r;
775
776         assert(argc >= 1);
777         assert(argv);
778
779         if (getpid() == 1)
780                 opterr = 0;
781
782         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
783
784                 switch (c) {
785
786                 case ARG_LOG_LEVEL:
787                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
788                                 log_error("Failed to parse log level %s.", optarg);
789                                 return r;
790                         }
791
792                         break;
793
794                 case ARG_LOG_TARGET:
795
796                         if ((r = log_set_target_from_string(optarg)) < 0) {
797                                 log_error("Failed to parse log target %s.", optarg);
798                                 return r;
799                         }
800
801                         break;
802
803                 case ARG_LOG_COLOR:
804
805                         if (optarg) {
806                                 if ((r = log_show_color_from_string(optarg)) < 0) {
807                                         log_error("Failed to parse log color setting %s.", optarg);
808                                         return r;
809                                 }
810                         } else
811                                 log_show_color(true);
812
813                         break;
814
815                 case ARG_LOG_LOCATION:
816
817                         if (optarg) {
818                                 if ((r = log_show_location_from_string(optarg)) < 0) {
819                                         log_error("Failed to parse log location setting %s.", optarg);
820                                         return r;
821                                 }
822                         } else
823                                 log_show_location(true);
824
825                         break;
826
827                 case ARG_DEFAULT_STD_OUTPUT:
828
829                         if ((r = exec_output_from_string(optarg)) < 0) {
830                                 log_error("Failed to parse default standard output setting %s.", optarg);
831                                 return r;
832                         } else
833                                 arg_default_std_output = r;
834                         break;
835
836                 case ARG_DEFAULT_STD_ERROR:
837
838                         if ((r = exec_output_from_string(optarg)) < 0) {
839                                 log_error("Failed to parse default standard error output setting %s.", optarg);
840                                 return r;
841                         } else
842                                 arg_default_std_error = r;
843                         break;
844
845                 case ARG_UNIT:
846
847                         if ((r = set_default_unit(optarg)) < 0) {
848                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
849                                 return r;
850                         }
851
852                         break;
853
854                 case ARG_SYSTEM:
855                         arg_running_as = SYSTEMD_SYSTEM;
856                         break;
857
858                 case ARG_USER:
859                         arg_running_as = SYSTEMD_USER;
860                         break;
861
862                 case ARG_TEST:
863                         arg_action = ACTION_TEST;
864                         break;
865
866                 case ARG_VERSION:
867                         arg_action = ACTION_VERSION;
868                         break;
869
870                 case ARG_DUMP_CONFIGURATION_ITEMS:
871                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
872                         break;
873
874                 case ARG_DUMP_CORE:
875                         r = optarg ? parse_boolean(optarg) : 1;
876                         if (r < 0) {
877                                 log_error("Failed to parse dump core boolean %s.", optarg);
878                                 return r;
879                         }
880                         arg_dump_core = r;
881                         break;
882
883                 case ARG_CRASH_SHELL:
884                         r = optarg ? parse_boolean(optarg) : 1;
885                         if (r < 0) {
886                                 log_error("Failed to parse crash shell boolean %s.", optarg);
887                                 return r;
888                         }
889                         arg_crash_shell = r;
890                         break;
891
892                 case ARG_CONFIRM_SPAWN:
893                         r = optarg ? parse_boolean(optarg) : 1;
894                         if (r < 0) {
895                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
896                                 return r;
897                         }
898                         arg_confirm_spawn = r;
899                         break;
900
901                 case ARG_SHOW_STATUS:
902                         r = optarg ? parse_boolean(optarg) : 1;
903                         if (r < 0) {
904                                 log_error("Failed to parse show status boolean %s.", optarg);
905                                 return r;
906                         }
907                         arg_show_status = r;
908                         break;
909
910                 case ARG_DESERIALIZE: {
911                         int fd;
912                         FILE *f;
913
914                         r = safe_atoi(optarg, &fd);
915                         if (r < 0 || fd < 0) {
916                                 log_error("Failed to parse deserialize option %s.", optarg);
917                                 return r < 0 ? r : -EINVAL;
918                         }
919
920                         fd_cloexec(fd, true);
921
922                         f = fdopen(fd, "r");
923                         if (!f) {
924                                 log_error("Failed to open serialization fd: %m");
925                                 return -errno;
926                         }
927
928                         if (serialization)
929                                 fclose(serialization);
930
931                         serialization = f;
932
933                         break;
934                 }
935
936                 case ARG_SWITCHED_ROOT:
937                         arg_switched_root = true;
938                         break;
939
940                 case ARG_INTROSPECT: {
941                         const char * const * i = NULL;
942
943                         for (i = bus_interface_table; *i; i += 2)
944                                 if (!optarg || streq(i[0], optarg)) {
945                                         fputs(DBUS_INTROSPECT_1_0_XML_DOCTYPE_DECL_NODE
946                                               "<node>\n", stdout);
947                                         fputs(i[1], stdout);
948                                         fputs("</node>\n", stdout);
949
950                                         if (optarg)
951                                                 break;
952                                 }
953
954                         if (!i[0] && optarg)
955                                 log_error("Unknown interface %s.", optarg);
956
957                         arg_action = ACTION_DONE;
958                         break;
959                 }
960
961                 case 'h':
962                         arg_action = ACTION_HELP;
963                         break;
964
965                 case 'D':
966                         log_set_max_level(LOG_DEBUG);
967                         break;
968
969                 case 'b':
970                 case 's':
971                 case 'z':
972                         /* Just to eat away the sysvinit kernel
973                          * cmdline args without getopt() error
974                          * messages that we'll parse in
975                          * parse_proc_cmdline_word() or ignore. */
976
977                 case '?':
978                 default:
979                         if (getpid() != 1) {
980                                 log_error("Unknown option code %c", c);
981                                 return -EINVAL;
982                         }
983
984                         break;
985                 }
986
987         if (optind < argc && getpid() != 1) {
988                 /* Hmm, when we aren't run as init system
989                  * let's complain about excess arguments */
990
991                 log_error("Excess arguments.");
992                 return -EINVAL;
993         }
994
995         if (detect_container(NULL) > 0) {
996                 char **a;
997
998                 /* All /proc/cmdline arguments the kernel didn't
999                  * understand it passed to us. We're not really
1000                  * interested in that usually since /proc/cmdline is
1001                  * more interesting and complete. With one exception:
1002                  * if we are run in a container /proc/cmdline is not
1003                  * relevant for the container, hence we rely on argv[]
1004                  * instead. */
1005
1006                 for (a = argv; a < argv + argc; a++)
1007                         if ((r = parse_proc_cmdline_word(*a)) < 0) {
1008                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
1009                                 return r;
1010                         }
1011         }
1012
1013         return 0;
1014 }
1015
1016 static int help(void) {
1017
1018         printf("%s [OPTIONS...]\n\n"
1019                "Starts up and maintains the system or user services.\n\n"
1020                "  -h --help                      Show this help\n"
1021                "     --test                      Determine startup sequence, dump it and exit\n"
1022                "     --dump-configuration-items  Dump understood unit configuration items\n"
1023                "     --introspect[=INTERFACE]    Extract D-Bus interface data\n"
1024                "     --unit=UNIT                 Set default unit\n"
1025                "     --system                    Run a system instance, even if PID != 1\n"
1026                "     --user                      Run a user instance\n"
1027                "     --dump-core[=0|1]           Dump core on crash\n"
1028                "     --crash-shell[=0|1]         Run shell on crash\n"
1029                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1030                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1031                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1032                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1033                "     --log-color[=0|1]           Highlight important log messages\n"
1034                "     --log-location[=0|1]        Include code location in log messages\n"
1035                "     --default-standard-output=  Set default standard output for services\n"
1036                "     --default-standard-error=   Set default standard error output for services\n",
1037                program_invocation_short_name);
1038
1039         return 0;
1040 }
1041
1042 static int version(void) {
1043         puts(PACKAGE_STRING);
1044         puts(SYSTEMD_FEATURES);
1045
1046         return 0;
1047 }
1048
1049 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1050         FILE *f = NULL;
1051         FDSet *fds = NULL;
1052         int r;
1053
1054         assert(m);
1055         assert(_f);
1056         assert(_fds);
1057
1058         r = manager_open_serialization(m, &f);
1059         if (r < 0) {
1060                 log_error("Failed to create serialization file: %s", strerror(-r));
1061                 goto fail;
1062         }
1063
1064         /* Make sure nothing is really destructed when we shut down */
1065         m->n_reloading ++;
1066         bus_broadcast_reloading(m, true);
1067
1068         fds = fdset_new();
1069         if (!fds) {
1070                 r = -ENOMEM;
1071                 log_error("Failed to allocate fd set: %s", strerror(-r));
1072                 goto fail;
1073         }
1074
1075         r = manager_serialize(m, f, fds, switching_root);
1076         if (r < 0) {
1077                 log_error("Failed to serialize state: %s", strerror(-r));
1078                 goto fail;
1079         }
1080
1081         if (fseeko(f, 0, SEEK_SET) < 0) {
1082                 log_error("Failed to rewind serialization fd: %m");
1083                 goto fail;
1084         }
1085
1086         r = fd_cloexec(fileno(f), false);
1087         if (r < 0) {
1088                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1089                 goto fail;
1090         }
1091
1092         r = fdset_cloexec(fds, false);
1093         if (r < 0) {
1094                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1095                 goto fail;
1096         }
1097
1098         *_f = f;
1099         *_fds = fds;
1100
1101         return 0;
1102
1103 fail:
1104         fdset_free(fds);
1105
1106         if (f)
1107                 fclose(f);
1108
1109         return r;
1110 }
1111
1112 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1113         struct rlimit nl;
1114         int r;
1115
1116         assert(saved_rlimit);
1117
1118         /* Save the original RLIMIT_NOFILE so that we can reset it
1119          * later when transitioning from the initrd to the main
1120          * systemd or suchlike. */
1121         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1122                 log_error("Reading RLIMIT_NOFILE failed: %m");
1123                 return -errno;
1124         }
1125
1126         /* Make sure forked processes get the default kernel setting */
1127         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1128                 struct rlimit *rl;
1129
1130                 rl = newdup(struct rlimit, saved_rlimit, 1);
1131                 if (!rl)
1132                         return log_oom();
1133
1134                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1135         }
1136
1137         /* Bump up the resource limit for ourselves substantially */
1138         nl.rlim_cur = nl.rlim_max = 64*1024;
1139         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1140         if (r < 0) {
1141                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1142                 return r;
1143         }
1144
1145         return 0;
1146 }
1147
1148 static struct dual_timestamp* parse_initrd_timestamp(struct dual_timestamp *t) {
1149         const char *e;
1150         unsigned long long a, b;
1151
1152         assert(t);
1153
1154         e = getenv("RD_TIMESTAMP");
1155         if (!e)
1156                 return NULL;
1157
1158         if (sscanf(e, "%llu %llu", &a, &b) != 2)
1159                 return NULL;
1160
1161         t->realtime = (usec_t) a;
1162         t->monotonic = (usec_t) b;
1163
1164         return t;
1165 }
1166
1167 static void test_mtab(void) {
1168         char *p;
1169
1170         /* Check that /etc/mtab is a symlink */
1171
1172         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1173                 bool b;
1174
1175                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1176                 free(p);
1177
1178                 if (b)
1179                         return;
1180         }
1181
1182         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1183                     "This is not supported anymore. "
1184                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1185 }
1186
1187 static void test_usr(void) {
1188
1189         /* Check that /usr is not a separate fs */
1190
1191         if (dir_is_empty("/usr") <= 0)
1192                 return;
1193
1194         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1195                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1196                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1197 }
1198
1199 static void test_cgroups(void) {
1200
1201         if (access("/proc/cgroups", F_OK) >= 0)
1202                 return;
1203
1204         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1205                     "Systems without control groups are not supported. "
1206                     "We will now sleep for 10s, and then continue boot-up. "
1207                     "Expect breakage and please do not file bugs. "
1208                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1209                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1210
1211         sleep(10);
1212 }
1213
1214 static int initialize_join_controllers(void) {
1215         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1216          * + "net_prio". We'd like to add "cpuset" to the mix, but
1217          * "cpuset" does't really work for groups with no initialized
1218          * attributes. */
1219
1220         arg_join_controllers = new(char**, 3);
1221         if (!arg_join_controllers)
1222                 return -ENOMEM;
1223
1224         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1225         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1226         arg_join_controllers[2] = NULL;
1227
1228         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1229                 free_join_controllers();
1230                 return -ENOMEM;
1231         }
1232
1233         return 0;
1234 }
1235
1236 int main(int argc, char *argv[]) {
1237         Manager *m = NULL;
1238         int r, retval = EXIT_FAILURE;
1239         usec_t before_startup, after_startup;
1240         char timespan[FORMAT_TIMESPAN_MAX];
1241         FDSet *fds = NULL;
1242         bool reexecute = false;
1243         const char *shutdown_verb = NULL;
1244         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1245         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1246         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1247         static char systemd[] = "systemd";
1248         bool skip_setup = false;
1249         int j;
1250         bool loaded_policy = false;
1251         bool arm_reboot_watchdog = false;
1252         bool queue_default_job = false;
1253         char *switch_root_dir = NULL, *switch_root_init = NULL;
1254         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1255
1256 #ifdef HAVE_SYSV_COMPAT
1257         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1258                 /* This is compatibility support for SysV, where
1259                  * calling init as a user is identical to telinit. */
1260
1261                 errno = -ENOENT;
1262                 execv(SYSTEMCTL_BINARY_PATH, argv);
1263                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1264                 return 1;
1265         }
1266 #endif
1267
1268         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1269         dual_timestamp_get(&userspace_timestamp);
1270
1271         /* Determine if this is a reexecution or normal bootup. We do
1272          * the full command line parsing much later, so let's just
1273          * have a quick peek here. */
1274         if (strv_find(argv+1, "--deserialize"))
1275                 skip_setup = true;
1276
1277         /* If we have switched root, do all the special setup
1278          * things */
1279         if (strv_find(argv+1, "--switched-root"))
1280                 skip_setup = false;
1281
1282         /* If we get started via the /sbin/init symlink then we are
1283            called 'init'. After a subsequent reexecution we are then
1284            called 'systemd'. That is confusing, hence let's call us
1285            systemd right-away. */
1286         program_invocation_short_name = systemd;
1287         prctl(PR_SET_NAME, systemd);
1288
1289         saved_argv = argv;
1290         saved_argc = argc;
1291
1292         log_show_color(isatty(STDERR_FILENO) > 0);
1293
1294         if (getpid() == 1 && detect_container(NULL) <= 0) {
1295
1296                 /* Running outside of a container as PID 1 */
1297                 arg_running_as = SYSTEMD_SYSTEM;
1298                 make_null_stdio();
1299                 log_set_target(LOG_TARGET_KMSG);
1300                 log_open();
1301
1302                 if (in_initrd()) {
1303                         char *rd_timestamp = NULL;
1304
1305                         initrd_timestamp = userspace_timestamp;
1306                         asprintf(&rd_timestamp, "%llu %llu",
1307                                  (unsigned long long) initrd_timestamp.realtime,
1308                                  (unsigned long long) initrd_timestamp.monotonic);
1309                         if (rd_timestamp) {
1310                                 setenv("RD_TIMESTAMP", rd_timestamp, 1);
1311                                 free(rd_timestamp);
1312                         }
1313                 }
1314
1315                 if (!skip_setup) {
1316                         mount_setup_early();
1317                         if (selinux_setup(&loaded_policy) < 0)
1318                                 goto finish;
1319                         if (ima_setup() < 0)
1320                                 goto finish;
1321                         if (smack_setup() < 0)
1322                                 goto finish;
1323                 }
1324
1325                 if (label_init(NULL) < 0)
1326                         goto finish;
1327
1328                 if (!skip_setup) {
1329                         if (hwclock_is_localtime() > 0) {
1330                                 int min;
1331
1332                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1333                                 r = hwclock_set_timezone(&min);
1334                                 if (r < 0)
1335                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1336                                 else
1337                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1338                         } else if (!in_initrd()) {
1339                                 /*
1340                                  * Do dummy first-time call to seal the kernel's time warp magic
1341                                  *
1342                                  * Do not call this this from inside the initrd. The initrd might not
1343                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1344                                  * that way. In such case, we need to delay the time-warp or the sealing
1345                                  * until we reach the real system.
1346                                  */
1347                                 hwclock_reset_timezone();
1348
1349                                 /* Tell the kernel our timezone */
1350                                 r = hwclock_set_timezone(NULL);
1351                                 if (r < 0)
1352                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1353                         }
1354                 }
1355
1356                 /* Set the default for later on, but don't actually
1357                  * open the logs like this for now. Note that if we
1358                  * are transitioning from the initrd there might still
1359                  * be journal fd open, and we shouldn't attempt
1360                  * opening that before we parsed /proc/cmdline which
1361                  * might redirect output elsewhere. */
1362                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1363
1364         } else if (getpid() == 1) {
1365                 /* Running inside a container, as PID 1 */
1366                 arg_running_as = SYSTEMD_SYSTEM;
1367                 log_set_target(LOG_TARGET_CONSOLE);
1368                 log_open();
1369
1370                 /* For the later on, see above... */
1371                 log_set_target(LOG_TARGET_JOURNAL);
1372
1373                 /* clear the kernel timestamp,
1374                  * because we are in a container */
1375                 kernel_timestamp.monotonic = 0ULL;
1376                 kernel_timestamp.realtime = 0ULL;
1377
1378         } else {
1379                 /* Running as user instance */
1380                 arg_running_as = SYSTEMD_USER;
1381                 log_set_target(LOG_TARGET_AUTO);
1382                 log_open();
1383
1384                 /* clear the kernel timestamp,
1385                  * because we are not PID 1 */
1386                 kernel_timestamp.monotonic = 0ULL;
1387                 kernel_timestamp.realtime = 0ULL;
1388         }
1389
1390         /* Initialize default unit */
1391         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1392         if (r < 0) {
1393                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1394                 goto finish;
1395         }
1396
1397         r = initialize_join_controllers();
1398         if (r < 0)
1399                 goto finish;
1400
1401         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1402          * /proc/$PID/fd is available. */
1403         if (getpid() == 1) {
1404                 r = mount_setup(loaded_policy);
1405                 if (r < 0)
1406                         goto finish;
1407         }
1408
1409         /* Reset all signal handlers. */
1410         assert_se(reset_all_signal_handlers() == 0);
1411
1412         ignore_signals(SIGNALS_IGNORE, -1);
1413
1414         if (parse_config_file() < 0)
1415                 goto finish;
1416
1417         if (arg_running_as == SYSTEMD_SYSTEM)
1418                 if (parse_proc_cmdline() < 0)
1419                         goto finish;
1420
1421         log_parse_environment();
1422
1423         if (parse_argv(argc, argv) < 0)
1424                 goto finish;
1425
1426         if (arg_action == ACTION_TEST &&
1427             geteuid() == 0) {
1428                 log_error("Don't run test mode as root.");
1429                 goto finish;
1430         }
1431
1432         if (arg_running_as == SYSTEMD_USER &&
1433             arg_action == ACTION_RUN &&
1434             sd_booted() <= 0) {
1435                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1436                 goto finish;
1437         }
1438
1439         if (arg_running_as == SYSTEMD_SYSTEM &&
1440             arg_action == ACTION_RUN &&
1441             running_in_chroot() > 0) {
1442                 log_error("Cannot be run in a chroot() environment.");
1443                 goto finish;
1444         }
1445
1446         if (arg_action == ACTION_HELP) {
1447                 retval = help();
1448                 goto finish;
1449         } else if (arg_action == ACTION_VERSION) {
1450                 retval = version();
1451                 goto finish;
1452         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1453                 unit_dump_config_items(stdout);
1454                 retval = EXIT_SUCCESS;
1455                 goto finish;
1456         } else if (arg_action == ACTION_DONE) {
1457                 retval = EXIT_SUCCESS;
1458                 goto finish;
1459         }
1460
1461         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1462
1463         /* Close logging fds, in order not to confuse fdset below */
1464         log_close();
1465
1466         /* Remember open file descriptors for later deserialization */
1467         r = fdset_new_fill(&fds);
1468         if (r < 0) {
1469                 log_error("Failed to allocate fd set: %s", strerror(-r));
1470                 goto finish;
1471         } else
1472                 fdset_cloexec(fds, true);
1473
1474         if (serialization)
1475                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1476
1477         /* Set up PATH unless it is already set */
1478         setenv("PATH",
1479 #ifdef HAVE_SPLIT_USR
1480                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1481 #else
1482                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin",
1483 #endif
1484                arg_running_as == SYSTEMD_SYSTEM);
1485
1486         if (arg_running_as == SYSTEMD_SYSTEM) {
1487                 /* Parse the data passed to us. We leave this
1488                  * variables set, but the manager later on will not
1489                  * pass them on to our children. */
1490                 if (!in_initrd())
1491                         parse_initrd_timestamp(&initrd_timestamp);
1492
1493                 /* Unset some environment variables passed in from the
1494                  * kernel that don't really make sense for us. */
1495                 unsetenv("HOME");
1496                 unsetenv("TERM");
1497
1498                 /* When we are invoked by a shell, these might be set,
1499                  * but make little sense to pass on */
1500                 unsetenv("PWD");
1501                 unsetenv("SHLVL");
1502                 unsetenv("_");
1503
1504                 /* When we are invoked by a chroot-like tool such as
1505                  * nspawn, these might be set, but make little sense
1506                  * to pass on */
1507                 unsetenv("USER");
1508                 unsetenv("LOGNAME");
1509
1510                 /* We suppress the socket activation env vars, as
1511                  * we'll try to match *any* open fd to units if
1512                  * possible. */
1513                 unsetenv("LISTEN_FDS");
1514                 unsetenv("LISTEN_PID");
1515
1516                 /* All other variables are left as is, so that clients
1517                  * can still read them via /proc/1/environ */
1518         }
1519
1520         /* Move out of the way, so that we won't block unmounts */
1521         assert_se(chdir("/")  == 0);
1522
1523         if (arg_running_as == SYSTEMD_SYSTEM) {
1524                 /* Become a session leader if we aren't one yet. */
1525                 setsid();
1526
1527                 /* Disable the umask logic */
1528                 umask(0);
1529         }
1530
1531         /* Make sure D-Bus doesn't fiddle with the SIGPIPE handlers */
1532         dbus_connection_set_change_sigpipe(FALSE);
1533
1534         /* Reset the console, but only if this is really init and we
1535          * are freshly booted */
1536         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1537                 console_setup(getpid() == 1 && !skip_setup);
1538
1539         /* Open the logging devices, if possible and necessary */
1540         log_open();
1541
1542         /* Make sure we leave a core dump without panicing the
1543          * kernel. */
1544         if (getpid() == 1) {
1545                 install_crash_handler();
1546
1547                 r = mount_cgroup_controllers(arg_join_controllers);
1548                 if (r < 0)
1549                         goto finish;
1550         }
1551
1552         if (arg_running_as == SYSTEMD_SYSTEM) {
1553                 const char *virtualization = NULL;
1554
1555                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1556
1557                 detect_virtualization(&virtualization);
1558                 if (virtualization)
1559                         log_info("Detected virtualization '%s'.", virtualization);
1560
1561                 if (in_initrd())
1562                         log_info("Running in initial RAM disk.");
1563
1564         } else
1565                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1566
1567         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1568                 locale_setup();
1569
1570                 if (arg_show_status || plymouth_running())
1571                         status_welcome();
1572
1573 #ifdef HAVE_KMOD
1574                 kmod_setup();
1575 #endif
1576                 hostname_setup();
1577                 machine_id_setup();
1578                 loopback_setup();
1579
1580                 test_mtab();
1581                 test_usr();
1582                 test_cgroups();
1583         }
1584
1585         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1586                 watchdog_set_timeout(&arg_runtime_watchdog);
1587
1588         if (arg_timer_slack_nsec != (nsec_t) -1)
1589                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1590                         log_error("Failed to adjust timer slack: %m");
1591
1592         if (arg_capability_bounding_set_drop) {
1593                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1594                 if (r < 0) {
1595                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1596                         goto finish;
1597                 }
1598                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1599                 if (r < 0) {
1600                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1601                         goto finish;
1602                 }
1603         }
1604
1605         if (arg_running_as == SYSTEMD_USER) {
1606                 /* Become reaper of our children */
1607                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1608                         log_warning("Failed to make us a subreaper: %m");
1609                         if (errno == EINVAL)
1610                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1611                 }
1612         }
1613
1614         if (arg_running_as == SYSTEMD_SYSTEM)
1615                 bump_rlimit_nofile(&saved_rlimit_nofile);
1616
1617         r = manager_new(arg_running_as, !!serialization, &m);
1618         if (r < 0) {
1619                 log_error("Failed to allocate manager object: %s", strerror(-r));
1620                 goto finish;
1621         }
1622
1623         m->confirm_spawn = arg_confirm_spawn;
1624         m->default_std_output = arg_default_std_output;
1625         m->default_std_error = arg_default_std_error;
1626         m->runtime_watchdog = arg_runtime_watchdog;
1627         m->shutdown_watchdog = arg_shutdown_watchdog;
1628         m->userspace_timestamp = userspace_timestamp;
1629         m->kernel_timestamp = kernel_timestamp;
1630         m->initrd_timestamp = initrd_timestamp;
1631
1632         manager_set_default_rlimits(m, arg_default_rlimit);
1633
1634         if (arg_default_environment)
1635                 manager_set_default_environment(m, arg_default_environment);
1636
1637         manager_set_show_status(m, arg_show_status);
1638
1639         /* Remember whether we should queue the default job */
1640         queue_default_job = !serialization || arg_switched_root;
1641
1642         before_startup = now(CLOCK_MONOTONIC);
1643
1644         r = manager_startup(m, serialization, fds);
1645         if (r < 0)
1646                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1647
1648         /* This will close all file descriptors that were opened, but
1649          * not claimed by any unit. */
1650         fdset_free(fds);
1651
1652         if (serialization) {
1653                 fclose(serialization);
1654                 serialization = NULL;
1655         }
1656
1657         if (queue_default_job) {
1658                 DBusError error;
1659                 Unit *target = NULL;
1660                 Job *default_unit_job;
1661
1662                 dbus_error_init(&error);
1663
1664                 log_debug("Activating default unit: %s", arg_default_unit);
1665
1666                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1667                 if (r < 0) {
1668                         log_error("Failed to load default target: %s", bus_error(&error, r));
1669                         dbus_error_free(&error);
1670                 } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1671                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1672                 else if (target->load_state == UNIT_MASKED)
1673                         log_error("Default target masked.");
1674
1675                 if (!target || target->load_state != UNIT_LOADED) {
1676                         log_info("Trying to load rescue target...");
1677
1678                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1679                         if (r < 0) {
1680                                 log_error("Failed to load rescue target: %s", bus_error(&error, r));
1681                                 dbus_error_free(&error);
1682                                 goto finish;
1683                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1684                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1685                                 goto finish;
1686                         } else if (target->load_state == UNIT_MASKED) {
1687                                 log_error("Rescue target masked.");
1688                                 goto finish;
1689                         }
1690                 }
1691
1692                 assert(target->load_state == UNIT_LOADED);
1693
1694                 if (arg_action == ACTION_TEST) {
1695                         printf("-> By units:\n");
1696                         manager_dump_units(m, stdout, "\t");
1697                 }
1698
1699                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1700                 if (r == -EPERM) {
1701                         log_debug("Default target could not be isolated, starting instead: %s", bus_error(&error, r));
1702                         dbus_error_free(&error);
1703
1704                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1705                         if (r < 0) {
1706                                 log_error("Failed to start default target: %s", bus_error(&error, r));
1707                                 dbus_error_free(&error);
1708                                 goto finish;
1709                         }
1710                 } else if (r < 0) {
1711                         log_error("Failed to isolate default target: %s", bus_error(&error, r));
1712                         dbus_error_free(&error);
1713                         goto finish;
1714                 }
1715
1716                 m->default_unit_job_id = default_unit_job->id;
1717
1718                 after_startup = now(CLOCK_MONOTONIC);
1719                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1720                          "Loaded units and determined initial transaction in %s.",
1721                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1722
1723                 if (arg_action == ACTION_TEST) {
1724                         printf("-> By jobs:\n");
1725                         manager_dump_jobs(m, stdout, "\t");
1726                         retval = EXIT_SUCCESS;
1727                         goto finish;
1728                 }
1729         }
1730
1731         for (;;) {
1732                 r = manager_loop(m);
1733                 if (r < 0) {
1734                         log_error("Failed to run mainloop: %s", strerror(-r));
1735                         goto finish;
1736                 }
1737
1738                 switch (m->exit_code) {
1739
1740                 case MANAGER_EXIT:
1741                         retval = EXIT_SUCCESS;
1742                         log_debug("Exit.");
1743                         goto finish;
1744
1745                 case MANAGER_RELOAD:
1746                         log_info("Reloading.");
1747                         r = manager_reload(m);
1748                         if (r < 0)
1749                                 log_error("Failed to reload: %s", strerror(-r));
1750                         break;
1751
1752                 case MANAGER_REEXECUTE:
1753
1754                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1755                                 goto finish;
1756
1757                         reexecute = true;
1758                         log_notice("Reexecuting.");
1759                         goto finish;
1760
1761                 case MANAGER_SWITCH_ROOT:
1762                         /* Steal the switch root parameters */
1763                         switch_root_dir = m->switch_root;
1764                         switch_root_init = m->switch_root_init;
1765                         m->switch_root = m->switch_root_init = NULL;
1766
1767                         if (!switch_root_init)
1768                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1769                                         goto finish;
1770
1771                         reexecute = true;
1772                         log_notice("Switching root.");
1773                         goto finish;
1774
1775                 case MANAGER_REBOOT:
1776                 case MANAGER_POWEROFF:
1777                 case MANAGER_HALT:
1778                 case MANAGER_KEXEC: {
1779                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1780                                 [MANAGER_REBOOT] = "reboot",
1781                                 [MANAGER_POWEROFF] = "poweroff",
1782                                 [MANAGER_HALT] = "halt",
1783                                 [MANAGER_KEXEC] = "kexec"
1784                         };
1785
1786                         assert_se(shutdown_verb = table[m->exit_code]);
1787                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1788
1789                         log_notice("Shutting down.");
1790                         goto finish;
1791                 }
1792
1793                 default:
1794                         assert_not_reached("Unknown exit code.");
1795                 }
1796         }
1797
1798 finish:
1799         if (m)
1800                 manager_free(m);
1801
1802         for (j = 0; j < RLIMIT_NLIMITS; j++)
1803                 free(arg_default_rlimit[j]);
1804
1805         free(arg_default_unit);
1806         free_join_controllers();
1807
1808         dbus_shutdown();
1809         label_finish();
1810
1811         if (reexecute) {
1812                 const char **args;
1813                 unsigned i, args_size;
1814
1815                 /* Close and disarm the watchdog, so that the new
1816                  * instance can reinitialize it, but doesn't get
1817                  * rebooted while we do that */
1818                 watchdog_close(true);
1819
1820                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1821                  * that the new systemd can pass the kernel default to
1822                  * its child processes */
1823                 if (saved_rlimit_nofile.rlim_cur > 0)
1824                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1825
1826                 if (switch_root_dir) {
1827                         /* Kill all remaining processes from the
1828                          * initrd, but don't wait for them, so that we
1829                          * can handle the SIGCHLD for them after
1830                          * deserializing. */
1831                         broadcast_signal(SIGTERM, false);
1832
1833                         /* And switch root */
1834                         r = switch_root(switch_root_dir);
1835                         if (r < 0)
1836                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1837                 }
1838
1839                 args_size = MAX(6, argc+1);
1840                 args = newa(const char*, args_size);
1841
1842                 if (!switch_root_init) {
1843                         char sfd[16];
1844
1845                         /* First try to spawn ourselves with the right
1846                          * path, and with full serialization. We do
1847                          * this only if the user didn't specify an
1848                          * explicit init to spawn. */
1849
1850                         assert(serialization);
1851                         assert(fds);
1852
1853                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1854                         char_array_0(sfd);
1855
1856                         i = 0;
1857                         args[i++] = SYSTEMD_BINARY_PATH;
1858                         if (switch_root_dir)
1859                                 args[i++] = "--switched-root";
1860                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1861                         args[i++] = "--deserialize";
1862                         args[i++] = sfd;
1863                         args[i++] = NULL;
1864
1865                         assert(i <= args_size);
1866                         execv(args[0], (char* const*) args);
1867                 }
1868
1869                 /* Try the fallback, if there is any, without any
1870                  * serialization. We pass the original argv[] and
1871                  * envp[]. (Well, modulo the ordering changes due to
1872                  * getopt() in argv[], and some cleanups in envp[],
1873                  * but let's hope that doesn't matter.) */
1874
1875                 if (serialization) {
1876                         fclose(serialization);
1877                         serialization = NULL;
1878                 }
1879
1880                 if (fds) {
1881                         fdset_free(fds);
1882                         fds = NULL;
1883                 }
1884
1885                 /* Reopen the console */
1886                 make_console_stdio();
1887
1888                 for (j = 1, i = 1; j < argc; j++)
1889                         args[i++] = argv[j];
1890                 args[i++] = NULL;
1891                 assert(i <= args_size);
1892
1893                 if (switch_root_init) {
1894                         args[0] = switch_root_init;
1895                         execv(args[0], (char* const*) args);
1896                         log_warning("Failed to execute configured init, trying fallback: %m");
1897                 }
1898
1899                 args[0] = "/sbin/init";
1900                 execv(args[0], (char* const*) args);
1901
1902                 if (errno == ENOENT) {
1903                         log_warning("No /sbin/init, trying fallback");
1904
1905                         args[0] = "/bin/sh";
1906                         args[1] = NULL;
1907                         execv(args[0], (char* const*) args);
1908                         log_error("Failed to execute /bin/sh, giving up: %m");
1909                 } else
1910                         log_warning("Failed to execute /sbin/init, giving up: %m");
1911         }
1912
1913         if (serialization)
1914                 fclose(serialization);
1915
1916         if (fds)
1917                 fdset_free(fds);
1918
1919         if (shutdown_verb) {
1920                 const char * command_line[] = {
1921                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1922                         shutdown_verb,
1923                         NULL
1924                 };
1925                 char **env_block;
1926
1927                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1928                         char e[32];
1929
1930                         /* If we reboot let's set the shutdown
1931                          * watchdog and tell the shutdown binary to
1932                          * repeatedly ping it */
1933                         watchdog_set_timeout(&arg_shutdown_watchdog);
1934                         watchdog_close(false);
1935
1936                         /* Tell the binary how often to ping */
1937                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1938                         char_array_0(e);
1939
1940                         env_block = strv_append(environ, e);
1941                 } else {
1942                         env_block = strv_copy(environ);
1943                         watchdog_close(true);
1944                 }
1945
1946                 /* Avoid the creation of new processes forked by the
1947                  * kernel; at this point, we will not listen to the
1948                  * signals anyway */
1949                 if (detect_container(NULL) <= 0)
1950                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1951
1952                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1953                 free(env_block);
1954                 log_error("Failed to execute shutdown binary, freezing: %m");
1955         }
1956
1957         if (getpid() == 1)
1958                 freeze();
1959
1960         return retval;
1961 }