chiark / gitweb /
seccomp: fix build again if libseccomp is missing
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <stdio.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <getopt.h>
29 #include <signal.h>
30 #include <sys/wait.h>
31 #include <fcntl.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
37 #endif
38
39 #include "sd-daemon.h"
40 #include "sd-messages.h"
41 #include "sd-bus.h"
42 #include "manager.h"
43 #include "log.h"
44 #include "load-fragment.h"
45 #include "fdset.h"
46 #include "special.h"
47 #include "conf-parser.h"
48 #include "missing.h"
49 #include "label.h"
50 #include "build.h"
51 #include "strv.h"
52 #include "def.h"
53 #include "virt.h"
54 #include "watchdog.h"
55 #include "path-util.h"
56 #include "switch-root.h"
57 #include "capability.h"
58 #include "killall.h"
59 #include "env-util.h"
60 #include "hwclock.h"
61 #include "fileio.h"
62 #include "dbus-manager.h"
63 #include "bus-error.h"
64 #include "bus-util.h"
65
66 #include "mount-setup.h"
67 #include "loopback-setup.h"
68 #include "hostname-setup.h"
69 #include "machine-id-setup.h"
70 #include "selinux-setup.h"
71 #include "ima-setup.h"
72 #include "smack-setup.h"
73 #ifdef HAVE_KMOD
74 #include "kmod-setup.h"
75 #endif
76
77 static enum {
78         ACTION_RUN,
79         ACTION_HELP,
80         ACTION_VERSION,
81         ACTION_TEST,
82         ACTION_DUMP_CONFIGURATION_ITEMS,
83         ACTION_DONE
84 } arg_action = ACTION_RUN;
85 static char *arg_default_unit = NULL;
86 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
87 static bool arg_dump_core = true;
88 static bool arg_crash_shell = false;
89 static int arg_crash_chvt = -1;
90 static bool arg_confirm_spawn = false;
91 static ShowStatus arg_show_status = SHOW_STATUS_UNSET;
92 static bool arg_switched_root = false;
93 static char ***arg_join_controllers = NULL;
94 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
95 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
96 static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
97 static usec_t arg_default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
98 static usec_t arg_default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
99 static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
100 static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
101 static usec_t arg_runtime_watchdog = 0;
102 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
103 static char **arg_default_environment = NULL;
104 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
105 static uint64_t arg_capability_bounding_set_drop = 0;
106 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
107 static Set* arg_syscall_archs = NULL;
108 static FILE* arg_serialization = NULL;
109
110 static void nop_handler(int sig) {}
111
112 noreturn static void crash(int sig) {
113
114         if (getpid() != 1)
115                 /* Pass this on immediately, if this is not PID 1 */
116                 raise(sig);
117         else if (!arg_dump_core)
118                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
119         else {
120                 struct sigaction sa = {
121                         .sa_handler = nop_handler,
122                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
123                 };
124                 pid_t pid;
125
126                 /* We want to wait for the core process, hence let's enable SIGCHLD */
127                 sigaction(SIGCHLD, &sa, NULL);
128
129                 pid = fork();
130                 if (pid < 0)
131                         log_error("Caught <%s>, cannot fork for core dump: %m", signal_to_string(sig));
132
133                 else if (pid == 0) {
134                         struct rlimit rl = {};
135
136                         /* Enable default signal handler for core dump */
137                         zero(sa);
138                         sa.sa_handler = SIG_DFL;
139                         sigaction(sig, &sa, NULL);
140
141                         /* Don't limit the core dump size */
142                         rl.rlim_cur = RLIM_INFINITY;
143                         rl.rlim_max = RLIM_INFINITY;
144                         setrlimit(RLIMIT_CORE, &rl);
145
146                         /* Just to be sure... */
147                         chdir("/");
148
149                         /* Raise the signal again */
150                         raise(sig);
151
152                         assert_not_reached("We shouldn't be here...");
153                         _exit(1);
154
155                 } else {
156                         siginfo_t status;
157                         int r;
158
159                         /* Order things nicely. */
160                         r = wait_for_terminate(pid, &status);
161                         if (r < 0)
162                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
163                         else if (status.si_code != CLD_DUMPED)
164                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
165                         else
166                                 log_error("Caught <%s>, dumped core as pid "PID_FMT".", signal_to_string(sig), pid);
167                 }
168         }
169
170         if (arg_crash_chvt)
171                 chvt(arg_crash_chvt);
172
173         if (arg_crash_shell) {
174                 struct sigaction sa = {
175                         .sa_handler = SIG_IGN,
176                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
177                 };
178                 pid_t pid;
179
180                 log_info("Executing crash shell in 10s...");
181                 sleep(10);
182
183                 /* Let the kernel reap children for us */
184                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
185
186                 pid = fork();
187                 if (pid < 0)
188                         log_error("Failed to fork off crash shell: %m");
189                 else if (pid == 0) {
190                         make_console_stdio();
191                         execl("/bin/sh", "/bin/sh", NULL);
192
193                         log_error("execl() failed: %m");
194                         _exit(1);
195                 }
196
197                 log_info("Successfully spawned crash shell as pid "PID_FMT".", pid);
198         }
199
200         log_info("Freezing execution.");
201         freeze();
202 }
203
204 static void install_crash_handler(void) {
205         struct sigaction sa = {
206                 .sa_handler = crash,
207                 .sa_flags = SA_NODEFER,
208         };
209
210         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
211 }
212
213 static int console_setup(bool do_reset) {
214         int tty_fd, r;
215
216         /* If we are init, we connect stdin/stdout/stderr to /dev/null
217          * and make sure we don't have a controlling tty. */
218
219         release_terminal();
220
221         if (!do_reset)
222                 return 0;
223
224         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
225         if (tty_fd < 0) {
226                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
227                 return -tty_fd;
228         }
229
230         /* We don't want to force text mode.
231          * plymouth may be showing pictures already from initrd. */
232         r = reset_terminal_fd(tty_fd, false);
233         if (r < 0)
234                 log_error("Failed to reset /dev/console: %s", strerror(-r));
235
236         close_nointr_nofail(tty_fd);
237         return r;
238 }
239
240 static int set_default_unit(const char *u) {
241         char *c;
242
243         assert(u);
244
245         c = strdup(u);
246         if (!c)
247                 return -ENOMEM;
248
249         free(arg_default_unit);
250         arg_default_unit = c;
251
252         return 0;
253 }
254
255 static int parse_proc_cmdline_word(const char *word) {
256
257         static const char * const rlmap[] = {
258                 "emergency", SPECIAL_EMERGENCY_TARGET,
259                 "-b",        SPECIAL_EMERGENCY_TARGET,
260                 "single",    SPECIAL_RESCUE_TARGET,
261                 "-s",        SPECIAL_RESCUE_TARGET,
262                 "s",         SPECIAL_RESCUE_TARGET,
263                 "S",         SPECIAL_RESCUE_TARGET,
264                 "1",         SPECIAL_RESCUE_TARGET,
265                 "2",         SPECIAL_RUNLEVEL2_TARGET,
266                 "3",         SPECIAL_RUNLEVEL3_TARGET,
267                 "4",         SPECIAL_RUNLEVEL4_TARGET,
268                 "5",         SPECIAL_RUNLEVEL5_TARGET,
269         };
270
271         assert(word);
272
273         if (startswith(word, "systemd.unit=")) {
274
275                 if (!in_initrd())
276                         return set_default_unit(word + 13);
277
278         } else if (startswith(word, "rd.systemd.unit=")) {
279
280                 if (in_initrd())
281                         return set_default_unit(word + 16);
282
283         } else if (startswith(word, "systemd.log_target=")) {
284
285                 if (log_set_target_from_string(word + 19) < 0)
286                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
287
288         } else if (startswith(word, "systemd.log_level=")) {
289
290                 if (log_set_max_level_from_string(word + 18) < 0)
291                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
292
293         } else if (startswith(word, "systemd.log_color=")) {
294
295                 if (log_show_color_from_string(word + 18) < 0)
296                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
297
298         } else if (startswith(word, "systemd.log_location=")) {
299
300                 if (log_show_location_from_string(word + 21) < 0)
301                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
302
303         } else if (startswith(word, "systemd.dump_core=")) {
304                 int r;
305
306                 if ((r = parse_boolean(word + 18)) < 0)
307                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
308                 else
309                         arg_dump_core = r;
310
311         } else if (startswith(word, "systemd.crash_shell=")) {
312                 int r;
313
314                 if ((r = parse_boolean(word + 20)) < 0)
315                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
316                 else
317                         arg_crash_shell = r;
318
319         } else if (startswith(word, "systemd.confirm_spawn=")) {
320                 int r;
321
322                 if ((r = parse_boolean(word + 22)) < 0)
323                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
324                 else
325                         arg_confirm_spawn = r;
326
327         } else if (startswith(word, "systemd.crash_chvt=")) {
328                 int k;
329
330                 if (safe_atoi(word + 19, &k) < 0)
331                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
332                 else
333                         arg_crash_chvt = k;
334
335         } else if (startswith(word, "systemd.show_status=")) {
336                 int r;
337
338                 r = parse_show_status(word + 20, &arg_show_status);
339                 if (r < 0)
340                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
341         } else if (startswith(word, "systemd.default_standard_output=")) {
342                 int r;
343
344                 if ((r = exec_output_from_string(word + 32)) < 0)
345                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
346                 else
347                         arg_default_std_output = r;
348         } else if (startswith(word, "systemd.default_standard_error=")) {
349                 int r;
350
351                 if ((r = exec_output_from_string(word + 31)) < 0)
352                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
353                 else
354                         arg_default_std_error = r;
355         } else if (startswith(word, "systemd.setenv=")) {
356                 _cleanup_free_ char *cenv = NULL;
357
358                 cenv = strdup(word + 15);
359                 if (!cenv)
360                         return -ENOMEM;
361
362                 if (env_assignment_is_valid(cenv)) {
363                         char **env;
364
365                         env = strv_env_set(arg_default_environment, cenv);
366                         if (env)
367                                 arg_default_environment = env;
368                         else
369                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
370                 } else
371                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
372
373         } else if (startswith(word, "systemd.") ||
374                    (in_initrd() && startswith(word, "rd.systemd."))) {
375
376                 const char *c;
377
378                 /* Ignore systemd.journald.xyz and friends */
379                 c = word;
380                 if (startswith(c, "rd."))
381                         c += 3;
382                 if (startswith(c, "systemd."))
383                         c += 8;
384                 if (c[strcspn(c, ".=")] != '.')  {
385
386                         log_warning("Unknown kernel switch %s. Ignoring.", word);
387
388                         log_info("Supported kernel switches:\n"
389                                  "systemd.unit=UNIT                        Default unit to start\n"
390                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
391                                  "systemd.dump_core=0|1                    Dump core on crash\n"
392                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
393                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
394                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
395                                  "systemd.show_status=0|1|auto             Show status updates on the console during bootup\n"
396                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
397                                  "                                         Log target\n"
398                                  "systemd.log_level=LEVEL                  Log level\n"
399                                  "systemd.log_color=0|1                    Highlight important log messages\n"
400                                  "systemd.log_location=0|1                 Include code location in log messages\n"
401                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
402                                  "                                         Set default log output for services\n"
403                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
404                                  "                                         Set default log error output for services\n"
405                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
406                 }
407
408         } else if (streq(word, "quiet")) {
409                 if (arg_show_status == SHOW_STATUS_UNSET)
410                         arg_show_status = SHOW_STATUS_AUTO;
411         } else if (streq(word, "debug")) {
412                 /* Log to kmsg, the journal socket will fill up before the
413                  * journal is started and tools running during that time
414                  * will block with every log message for for 60 seconds,
415                  * before they give up. */
416                 log_set_max_level(LOG_DEBUG);
417                 log_set_target(detect_container(NULL) > 0 ? LOG_TARGET_CONSOLE : LOG_TARGET_KMSG);
418         } else if (!in_initrd()) {
419                 unsigned i;
420
421                 /* SysV compatibility */
422                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
423                         if (streq(word, rlmap[i]))
424                                 return set_default_unit(rlmap[i+1]);
425         }
426
427         return 0;
428 }
429
430 #define DEFINE_SETTER(name, func, descr)                              \
431         static int name(const char *unit,                             \
432                         const char *filename,                         \
433                         unsigned line,                                \
434                         const char *section,                          \
435                         unsigned section_line,                        \
436                         const char *lvalue,                           \
437                         int ltype,                                    \
438                         const char *rvalue,                           \
439                         void *data,                                   \
440                         void *userdata) {                             \
441                                                                       \
442                 int r;                                                \
443                                                                       \
444                 assert(filename);                                     \
445                 assert(lvalue);                                       \
446                 assert(rvalue);                                       \
447                                                                       \
448                 r = func(rvalue);                                     \
449                 if (r < 0)                                            \
450                         log_syntax(unit, LOG_ERR, filename, line, -r, \
451                                    "Invalid " descr "'%s': %s",       \
452                                    rvalue, strerror(-r));             \
453                                                                       \
454                 return 0;                                             \
455         }
456
457 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
458 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
459 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
460 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
461
462 static int config_parse_cpu_affinity2(const char *unit,
463                                       const char *filename,
464                                       unsigned line,
465                                       const char *section,
466                                       unsigned section_line,
467                                       const char *lvalue,
468                                       int ltype,
469                                       const char *rvalue,
470                                       void *data,
471                                       void *userdata) {
472
473         char *w;
474         size_t l;
475         char *state;
476         cpu_set_t *c = NULL;
477         unsigned ncpus = 0;
478
479         assert(filename);
480         assert(lvalue);
481         assert(rvalue);
482
483         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
484                 char *t;
485                 int r;
486                 unsigned cpu;
487
488                 if (!(t = strndup(w, l)))
489                         return log_oom();
490
491                 r = safe_atou(t, &cpu);
492                 free(t);
493
494                 if (!c)
495                         if (!(c = cpu_set_malloc(&ncpus)))
496                                 return log_oom();
497
498                 if (r < 0 || cpu >= ncpus) {
499                         log_syntax(unit, LOG_ERR, filename, line, -r,
500                                    "Failed to parse CPU affinity '%s'", rvalue);
501                         CPU_FREE(c);
502                         return -EBADMSG;
503                 }
504
505                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
506         }
507
508         if (c) {
509                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
510                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
511
512                 CPU_FREE(c);
513         }
514
515         return 0;
516 }
517
518 static void strv_free_free(char ***l) {
519         char ***i;
520
521         if (!l)
522                 return;
523
524         for (i = l; *i; i++)
525                 strv_free(*i);
526
527         free(l);
528 }
529
530 static void free_join_controllers(void) {
531         strv_free_free(arg_join_controllers);
532         arg_join_controllers = NULL;
533 }
534
535 static int config_parse_join_controllers(const char *unit,
536                                          const char *filename,
537                                          unsigned line,
538                                          const char *section,
539                                          unsigned section_line,
540                                          const char *lvalue,
541                                          int ltype,
542                                          const char *rvalue,
543                                          void *data,
544                                          void *userdata) {
545
546         unsigned n = 0;
547         char *state, *w;
548         size_t length;
549
550         assert(filename);
551         assert(lvalue);
552         assert(rvalue);
553
554         free_join_controllers();
555
556         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
557                 char *s, **l;
558
559                 s = strndup(w, length);
560                 if (!s)
561                         return log_oom();
562
563                 l = strv_split(s, ",");
564                 free(s);
565
566                 strv_uniq(l);
567
568                 if (strv_length(l) <= 1) {
569                         strv_free(l);
570                         continue;
571                 }
572
573                 if (!arg_join_controllers) {
574                         arg_join_controllers = new(char**, 2);
575                         if (!arg_join_controllers) {
576                                 strv_free(l);
577                                 return log_oom();
578                         }
579
580                         arg_join_controllers[0] = l;
581                         arg_join_controllers[1] = NULL;
582
583                         n = 1;
584                 } else {
585                         char ***a;
586                         char ***t;
587
588                         t = new0(char**, n+2);
589                         if (!t) {
590                                 strv_free(l);
591                                 return log_oom();
592                         }
593
594                         n = 0;
595
596                         for (a = arg_join_controllers; *a; a++) {
597
598                                 if (strv_overlap(*a, l)) {
599                                         if (strv_extend_strv(&l, *a) < 0) {
600                                                 strv_free(l);
601                                                 strv_free_free(t);
602                                                 return log_oom();
603                                         }
604
605                                 } else {
606                                         char **c;
607
608                                         c = strv_copy(*a);
609                                         if (!c) {
610                                                 strv_free(l);
611                                                 strv_free_free(t);
612                                                 return log_oom();
613                                         }
614
615                                         t[n++] = c;
616                                 }
617                         }
618
619                         t[n++] = strv_uniq(l);
620
621                         strv_free_free(arg_join_controllers);
622                         arg_join_controllers = t;
623                 }
624         }
625
626         return 0;
627 }
628
629 static int parse_config_file(void) {
630
631         const ConfigTableItem items[] = {
632                 { "Manager", "LogLevel",                  config_parse_level2,           0, NULL                                   },
633                 { "Manager", "LogTarget",                 config_parse_target,           0, NULL                                   },
634                 { "Manager", "LogColor",                  config_parse_color,            0, NULL                                   },
635                 { "Manager", "LogLocation",               config_parse_location,         0, NULL                                   },
636                 { "Manager", "DumpCore",                  config_parse_bool,             0, &arg_dump_core                         },
637                 { "Manager", "CrashShell",                config_parse_bool,             0, &arg_crash_shell                       },
638                 { "Manager", "ShowStatus",                config_parse_show_status,      0, &arg_show_status                       },
639                 { "Manager", "CrashChVT",                 config_parse_int,              0, &arg_crash_chvt                        },
640                 { "Manager", "CPUAffinity",               config_parse_cpu_affinity2,    0, NULL                                   },
641                 { "Manager", "JoinControllers",           config_parse_join_controllers, 0, &arg_join_controllers                  },
642                 { "Manager", "RuntimeWatchdogSec",        config_parse_sec,              0, &arg_runtime_watchdog                  },
643                 { "Manager", "ShutdownWatchdogSec",       config_parse_sec,              0, &arg_shutdown_watchdog                 },
644                 { "Manager", "CapabilityBoundingSet",     config_parse_bounding_set,     0, &arg_capability_bounding_set_drop      },
645 #ifdef HAVE_SECCOMP
646                 { "Manager", "SystemCallArchitectures",   config_parse_syscall_archs,    0, &arg_syscall_archs                     },
647 #endif
648                 { "Manager", "TimerSlackNSec",            config_parse_nsec,             0, &arg_timer_slack_nsec                  },
649                 { "Manager", "DefaultStandardOutput",     config_parse_output,           0, &arg_default_std_output                },
650                 { "Manager", "DefaultStandardError",      config_parse_output,           0, &arg_default_std_error                 },
651                 { "Manager", "DefaultTimeoutStartSec",    config_parse_sec,              0, &arg_default_timeout_start_usec        },
652                 { "Manager", "DefaultTimeoutStopSec",     config_parse_sec,              0, &arg_default_timeout_stop_usec         },
653                 { "Manager", "DefaultRestartSec",         config_parse_sec,              0, &arg_default_restart_usec              },
654                 { "Manager", "DefaultStartLimitInterval", config_parse_sec,              0, &arg_default_start_limit_interval      },
655                 { "Manager", "DefaultStartLimitBurst",    config_parse_unsigned,         0, &arg_default_start_limit_burst         },
656                 { "Manager", "DefaultEnvironment",        config_parse_environ,          0, &arg_default_environment               },
657                 { "Manager", "DefaultLimitCPU",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CPU]        },
658                 { "Manager", "DefaultLimitFSIZE",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_FSIZE]      },
659                 { "Manager", "DefaultLimitDATA",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_DATA]       },
660                 { "Manager", "DefaultLimitSTACK",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_STACK]      },
661                 { "Manager", "DefaultLimitCORE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_CORE]       },
662                 { "Manager", "DefaultLimitRSS",           config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RSS]        },
663                 { "Manager", "DefaultLimitNOFILE",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NOFILE]     },
664                 { "Manager", "DefaultLimitAS",            config_parse_limit,            0, &arg_default_rlimit[RLIMIT_AS]         },
665                 { "Manager", "DefaultLimitNPROC",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NPROC]      },
666                 { "Manager", "DefaultLimitMEMLOCK",       config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MEMLOCK]    },
667                 { "Manager", "DefaultLimitLOCKS",         config_parse_limit,            0, &arg_default_rlimit[RLIMIT_LOCKS]      },
668                 { "Manager", "DefaultLimitSIGPENDING",    config_parse_limit,            0, &arg_default_rlimit[RLIMIT_SIGPENDING] },
669                 { "Manager", "DefaultLimitMSGQUEUE",      config_parse_limit,            0, &arg_default_rlimit[RLIMIT_MSGQUEUE]   },
670                 { "Manager", "DefaultLimitNICE",          config_parse_limit,            0, &arg_default_rlimit[RLIMIT_NICE]       },
671                 { "Manager", "DefaultLimitRTPRIO",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTPRIO]     },
672                 { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            0, &arg_default_rlimit[RLIMIT_RTTIME]     },
673                 {}
674         };
675
676         _cleanup_fclose_ FILE *f;
677         const char *fn;
678         int r;
679
680         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
681         f = fopen(fn, "re");
682         if (!f) {
683                 if (errno == ENOENT)
684                         return 0;
685
686                 log_warning("Failed to open configuration file '%s': %m", fn);
687                 return 0;
688         }
689
690         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
691         if (r < 0)
692                 log_warning("Failed to parse configuration file: %s", strerror(-r));
693
694         return 0;
695 }
696
697 static int parse_proc_cmdline(void) {
698         _cleanup_free_ char *line = NULL;
699         char *w, *state;
700         size_t l;
701         int r;
702
703         r = proc_cmdline(&line);
704         if (r < 0)
705                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
706         if (r <= 0)
707                 return 0;
708
709         FOREACH_WORD_QUOTED(w, l, line, state) {
710                 _cleanup_free_ char *word;
711
712                 word = strndup(w, l);
713                 if (!word)
714                         return log_oom();
715
716                 r = parse_proc_cmdline_word(word);
717                 if (r < 0) {
718                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
719                         return r;
720                 }
721         }
722
723         return 0;
724 }
725
726 static int parse_argv(int argc, char *argv[]) {
727
728         enum {
729                 ARG_LOG_LEVEL = 0x100,
730                 ARG_LOG_TARGET,
731                 ARG_LOG_COLOR,
732                 ARG_LOG_LOCATION,
733                 ARG_UNIT,
734                 ARG_SYSTEM,
735                 ARG_USER,
736                 ARG_TEST,
737                 ARG_VERSION,
738                 ARG_DUMP_CONFIGURATION_ITEMS,
739                 ARG_DUMP_CORE,
740                 ARG_CRASH_SHELL,
741                 ARG_CONFIRM_SPAWN,
742                 ARG_SHOW_STATUS,
743                 ARG_DESERIALIZE,
744                 ARG_SWITCHED_ROOT,
745                 ARG_DEFAULT_STD_OUTPUT,
746                 ARG_DEFAULT_STD_ERROR
747         };
748
749         static const struct option options[] = {
750                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
751                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
752                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
753                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
754                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
755                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
756                 { "user",                     no_argument,       NULL, ARG_USER                     },
757                 { "test",                     no_argument,       NULL, ARG_TEST                     },
758                 { "help",                     no_argument,       NULL, 'h'                          },
759                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
760                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
761                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
762                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
763                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
764                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
765                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
766                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
767                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
768                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
769                 { NULL,                       0,                 NULL, 0                            }
770         };
771
772         int c, r;
773
774         assert(argc >= 1);
775         assert(argv);
776
777         if (getpid() == 1)
778                 opterr = 0;
779
780         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
781
782                 switch (c) {
783
784                 case ARG_LOG_LEVEL:
785                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
786                                 log_error("Failed to parse log level %s.", optarg);
787                                 return r;
788                         }
789
790                         break;
791
792                 case ARG_LOG_TARGET:
793
794                         if ((r = log_set_target_from_string(optarg)) < 0) {
795                                 log_error("Failed to parse log target %s.", optarg);
796                                 return r;
797                         }
798
799                         break;
800
801                 case ARG_LOG_COLOR:
802
803                         if (optarg) {
804                                 if ((r = log_show_color_from_string(optarg)) < 0) {
805                                         log_error("Failed to parse log color setting %s.", optarg);
806                                         return r;
807                                 }
808                         } else
809                                 log_show_color(true);
810
811                         break;
812
813                 case ARG_LOG_LOCATION:
814
815                         if (optarg) {
816                                 if ((r = log_show_location_from_string(optarg)) < 0) {
817                                         log_error("Failed to parse log location setting %s.", optarg);
818                                         return r;
819                                 }
820                         } else
821                                 log_show_location(true);
822
823                         break;
824
825                 case ARG_DEFAULT_STD_OUTPUT:
826
827                         if ((r = exec_output_from_string(optarg)) < 0) {
828                                 log_error("Failed to parse default standard output setting %s.", optarg);
829                                 return r;
830                         } else
831                                 arg_default_std_output = r;
832                         break;
833
834                 case ARG_DEFAULT_STD_ERROR:
835
836                         if ((r = exec_output_from_string(optarg)) < 0) {
837                                 log_error("Failed to parse default standard error output setting %s.", optarg);
838                                 return r;
839                         } else
840                                 arg_default_std_error = r;
841                         break;
842
843                 case ARG_UNIT:
844
845                         if ((r = set_default_unit(optarg)) < 0) {
846                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
847                                 return r;
848                         }
849
850                         break;
851
852                 case ARG_SYSTEM:
853                         arg_running_as = SYSTEMD_SYSTEM;
854                         break;
855
856                 case ARG_USER:
857                         arg_running_as = SYSTEMD_USER;
858                         break;
859
860                 case ARG_TEST:
861                         arg_action = ACTION_TEST;
862                         break;
863
864                 case ARG_VERSION:
865                         arg_action = ACTION_VERSION;
866                         break;
867
868                 case ARG_DUMP_CONFIGURATION_ITEMS:
869                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
870                         break;
871
872                 case ARG_DUMP_CORE:
873                         r = optarg ? parse_boolean(optarg) : 1;
874                         if (r < 0) {
875                                 log_error("Failed to parse dump core boolean %s.", optarg);
876                                 return r;
877                         }
878                         arg_dump_core = r;
879                         break;
880
881                 case ARG_CRASH_SHELL:
882                         r = optarg ? parse_boolean(optarg) : 1;
883                         if (r < 0) {
884                                 log_error("Failed to parse crash shell boolean %s.", optarg);
885                                 return r;
886                         }
887                         arg_crash_shell = r;
888                         break;
889
890                 case ARG_CONFIRM_SPAWN:
891                         r = optarg ? parse_boolean(optarg) : 1;
892                         if (r < 0) {
893                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
894                                 return r;
895                         }
896                         arg_confirm_spawn = r;
897                         break;
898
899                 case ARG_SHOW_STATUS:
900                         if (optarg) {
901                                 r = parse_show_status(optarg, &arg_show_status);
902                                 if (r < 0) {
903                                         log_error("Failed to parse show status boolean %s.", optarg);
904                                         return r;
905                                 }
906                         } else
907                                 arg_show_status = SHOW_STATUS_YES;
908                         break;
909
910                 case ARG_DESERIALIZE: {
911                         int fd;
912                         FILE *f;
913
914                         r = safe_atoi(optarg, &fd);
915                         if (r < 0 || fd < 0) {
916                                 log_error("Failed to parse deserialize option %s.", optarg);
917                                 return r < 0 ? r : -EINVAL;
918                         }
919
920                         fd_cloexec(fd, true);
921
922                         f = fdopen(fd, "r");
923                         if (!f) {
924                                 log_error("Failed to open serialization fd: %m");
925                                 return -errno;
926                         }
927
928                         if (arg_serialization)
929                                 fclose(arg_serialization);
930
931                         arg_serialization = f;
932
933                         break;
934                 }
935
936                 case ARG_SWITCHED_ROOT:
937                         arg_switched_root = true;
938                         break;
939
940                 case 'h':
941                         arg_action = ACTION_HELP;
942                         break;
943
944                 case 'D':
945                         log_set_max_level(LOG_DEBUG);
946                         break;
947
948                 case 'b':
949                 case 's':
950                 case 'z':
951                         /* Just to eat away the sysvinit kernel
952                          * cmdline args without getopt() error
953                          * messages that we'll parse in
954                          * parse_proc_cmdline_word() or ignore. */
955
956                 case '?':
957                 default:
958                         if (getpid() != 1) {
959                                 log_error("Unknown option code %c", c);
960                                 return -EINVAL;
961                         }
962
963                         break;
964                 }
965
966         if (optind < argc && getpid() != 1) {
967                 /* Hmm, when we aren't run as init system
968                  * let's complain about excess arguments */
969
970                 log_error("Excess arguments.");
971                 return -EINVAL;
972         }
973
974         if (detect_container(NULL) > 0) {
975                 char **a;
976
977                 /* All /proc/cmdline arguments the kernel didn't
978                  * understand it passed to us. We're not really
979                  * interested in that usually since /proc/cmdline is
980                  * more interesting and complete. With one exception:
981                  * if we are run in a container /proc/cmdline is not
982                  * relevant for the container, hence we rely on argv[]
983                  * instead. */
984
985                 for (a = argv; a < argv + argc; a++) {
986                         r = parse_proc_cmdline_word(*a);
987                         if (r < 0) {
988                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
989                                 return r;
990                         }
991                 }
992         }
993
994         return 0;
995 }
996
997 static int help(void) {
998
999         printf("%s [OPTIONS...]\n\n"
1000                "Starts up and maintains the system or user services.\n\n"
1001                "  -h --help                      Show this help\n"
1002                "     --test                      Determine startup sequence, dump it and exit\n"
1003                "     --dump-configuration-items  Dump understood unit configuration items\n"
1004                "     --unit=UNIT                 Set default unit\n"
1005                "     --system                    Run a system instance, even if PID != 1\n"
1006                "     --user                      Run a user instance\n"
1007                "     --dump-core[=0|1]           Dump core on crash\n"
1008                "     --crash-shell[=0|1]         Run shell on crash\n"
1009                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1010                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1011                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1012                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1013                "     --log-color[=0|1]           Highlight important log messages\n"
1014                "     --log-location[=0|1]        Include code location in log messages\n"
1015                "     --default-standard-output=  Set default standard output for services\n"
1016                "     --default-standard-error=   Set default standard error output for services\n",
1017                program_invocation_short_name);
1018
1019         return 0;
1020 }
1021
1022 static int version(void) {
1023         puts(PACKAGE_STRING);
1024         puts(SYSTEMD_FEATURES);
1025
1026         return 0;
1027 }
1028
1029 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1030         FILE *f = NULL;
1031         FDSet *fds = NULL;
1032         int r;
1033
1034         assert(m);
1035         assert(_f);
1036         assert(_fds);
1037
1038         r = manager_open_serialization(m, &f);
1039         if (r < 0) {
1040                 log_error("Failed to create serialization file: %s", strerror(-r));
1041                 goto fail;
1042         }
1043
1044         /* Make sure nothing is really destructed when we shut down */
1045         m->n_reloading ++;
1046         bus_manager_send_reloading(m, true);
1047
1048         fds = fdset_new();
1049         if (!fds) {
1050                 r = -ENOMEM;
1051                 log_error("Failed to allocate fd set: %s", strerror(-r));
1052                 goto fail;
1053         }
1054
1055         r = manager_serialize(m, f, fds, switching_root);
1056         if (r < 0) {
1057                 log_error("Failed to serialize state: %s", strerror(-r));
1058                 goto fail;
1059         }
1060
1061         if (fseeko(f, 0, SEEK_SET) < 0) {
1062                 log_error("Failed to rewind serialization fd: %m");
1063                 goto fail;
1064         }
1065
1066         r = fd_cloexec(fileno(f), false);
1067         if (r < 0) {
1068                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1069                 goto fail;
1070         }
1071
1072         r = fdset_cloexec(fds, false);
1073         if (r < 0) {
1074                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1075                 goto fail;
1076         }
1077
1078         *_f = f;
1079         *_fds = fds;
1080
1081         return 0;
1082
1083 fail:
1084         fdset_free(fds);
1085
1086         if (f)
1087                 fclose(f);
1088
1089         return r;
1090 }
1091
1092 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1093         struct rlimit nl;
1094         int r;
1095
1096         assert(saved_rlimit);
1097
1098         /* Save the original RLIMIT_NOFILE so that we can reset it
1099          * later when transitioning from the initrd to the main
1100          * systemd or suchlike. */
1101         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1102                 log_error("Reading RLIMIT_NOFILE failed: %m");
1103                 return -errno;
1104         }
1105
1106         /* Make sure forked processes get the default kernel setting */
1107         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1108                 struct rlimit *rl;
1109
1110                 rl = newdup(struct rlimit, saved_rlimit, 1);
1111                 if (!rl)
1112                         return log_oom();
1113
1114                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1115         }
1116
1117         /* Bump up the resource limit for ourselves substantially */
1118         nl.rlim_cur = nl.rlim_max = 64*1024;
1119         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1120         if (r < 0) {
1121                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1122                 return r;
1123         }
1124
1125         return 0;
1126 }
1127
1128 static void test_mtab(void) {
1129         char *p;
1130
1131         /* Check that /etc/mtab is a symlink */
1132
1133         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1134                 bool b;
1135
1136                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1137                 free(p);
1138
1139                 if (b)
1140                         return;
1141         }
1142
1143         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1144                     "This is not supported anymore. "
1145                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1146 }
1147
1148 static void test_usr(void) {
1149
1150         /* Check that /usr is not a separate fs */
1151
1152         if (dir_is_empty("/usr") <= 0)
1153                 return;
1154
1155         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1156                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1157                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1158 }
1159
1160 static void test_cgroups(void) {
1161
1162         if (access("/proc/cgroups", F_OK) >= 0)
1163                 return;
1164
1165         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1166                     "Systems without control groups are not supported. "
1167                     "We will now sleep for 10s, and then continue boot-up. "
1168                     "Expect breakage and please do not file bugs. "
1169                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1170                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1171
1172         sleep(10);
1173 }
1174
1175 static int initialize_join_controllers(void) {
1176         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1177          * + "net_prio". We'd like to add "cpuset" to the mix, but
1178          * "cpuset" does't really work for groups with no initialized
1179          * attributes. */
1180
1181         arg_join_controllers = new(char**, 3);
1182         if (!arg_join_controllers)
1183                 return -ENOMEM;
1184
1185         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1186         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1187         arg_join_controllers[2] = NULL;
1188
1189         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1190                 free_join_controllers();
1191                 return -ENOMEM;
1192         }
1193
1194         return 0;
1195 }
1196
1197 static int enforce_syscall_archs(Set *archs) {
1198 #ifdef HAVE_SECCOMP
1199         scmp_filter_ctx *seccomp;
1200         Iterator i;
1201         void *id;
1202         int r;
1203
1204         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1205         if (!seccomp)
1206                 return log_oom();
1207
1208         SET_FOREACH(id, arg_syscall_archs, i) {
1209                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1210                 if (r == -EEXIST)
1211                         continue;
1212                 if (r < 0) {
1213                         log_error("Failed to add architecture to seccomp: %s", strerror(-r));
1214                         goto finish;
1215                 }
1216         }
1217
1218         r = seccomp_load(seccomp);
1219         if (r < 0)
1220                 log_error("Failed to add install architecture seccomp: %s", strerror(-r));
1221
1222 finish:
1223         seccomp_release(seccomp);
1224         return r;
1225 #else
1226         return 0;
1227 #endif
1228 }
1229
1230 int main(int argc, char *argv[]) {
1231         Manager *m = NULL;
1232         int r, retval = EXIT_FAILURE;
1233         usec_t before_startup, after_startup;
1234         char timespan[FORMAT_TIMESPAN_MAX];
1235         FDSet *fds = NULL;
1236         bool reexecute = false;
1237         const char *shutdown_verb = NULL;
1238         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1239         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1240         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1241         dual_timestamp security_start_timestamp = { 0ULL, 0ULL };
1242         dual_timestamp security_finish_timestamp = { 0ULL, 0ULL };
1243         static char systemd[] = "systemd";
1244         bool skip_setup = false;
1245         unsigned j;
1246         bool loaded_policy = false;
1247         bool arm_reboot_watchdog = false;
1248         bool queue_default_job = false;
1249         char *switch_root_dir = NULL, *switch_root_init = NULL;
1250         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1251
1252 #ifdef HAVE_SYSV_COMPAT
1253         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1254                 /* This is compatibility support for SysV, where
1255                  * calling init as a user is identical to telinit. */
1256
1257                 errno = -ENOENT;
1258                 execv(SYSTEMCTL_BINARY_PATH, argv);
1259                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1260                 return 1;
1261         }
1262 #endif
1263
1264         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1265         dual_timestamp_get(&userspace_timestamp);
1266
1267         /* Determine if this is a reexecution or normal bootup. We do
1268          * the full command line parsing much later, so let's just
1269          * have a quick peek here. */
1270         if (strv_find(argv+1, "--deserialize"))
1271                 skip_setup = true;
1272
1273         /* If we have switched root, do all the special setup
1274          * things */
1275         if (strv_find(argv+1, "--switched-root"))
1276                 skip_setup = false;
1277
1278         /* If we get started via the /sbin/init symlink then we are
1279            called 'init'. After a subsequent reexecution we are then
1280            called 'systemd'. That is confusing, hence let's call us
1281            systemd right-away. */
1282         program_invocation_short_name = systemd;
1283         prctl(PR_SET_NAME, systemd);
1284
1285         saved_argv = argv;
1286         saved_argc = argc;
1287
1288         log_show_color(isatty(STDERR_FILENO) > 0);
1289
1290         /* Disable the umask logic */
1291         if (getpid() == 1)
1292                 umask(0);
1293
1294         if (getpid() == 1 && detect_container(NULL) <= 0) {
1295
1296                 /* Running outside of a container as PID 1 */
1297                 arg_running_as = SYSTEMD_SYSTEM;
1298                 make_null_stdio();
1299                 log_set_target(LOG_TARGET_KMSG);
1300                 log_open();
1301
1302                 if (in_initrd())
1303                         initrd_timestamp = userspace_timestamp;
1304
1305                 if (!skip_setup) {
1306                         mount_setup_early();
1307                         dual_timestamp_get(&security_start_timestamp);
1308                         if (selinux_setup(&loaded_policy) < 0)
1309                                 goto finish;
1310                         if (ima_setup() < 0)
1311                                 goto finish;
1312                         if (smack_setup() < 0)
1313                                 goto finish;
1314                         dual_timestamp_get(&security_finish_timestamp);
1315                 }
1316
1317                 if (label_init(NULL) < 0)
1318                         goto finish;
1319
1320                 if (!skip_setup) {
1321                         if (hwclock_is_localtime() > 0) {
1322                                 int min;
1323
1324                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1325                                 r = hwclock_set_timezone(&min);
1326                                 if (r < 0)
1327                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1328                                 else
1329                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1330                         } else if (!in_initrd()) {
1331                                 /*
1332                                  * Do dummy first-time call to seal the kernel's time warp magic
1333                                  *
1334                                  * Do not call this this from inside the initrd. The initrd might not
1335                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1336                                  * that way. In such case, we need to delay the time-warp or the sealing
1337                                  * until we reach the real system.
1338                                  */
1339                                 hwclock_reset_timezone();
1340
1341                                 /* Tell the kernel our timezone */
1342                                 r = hwclock_set_timezone(NULL);
1343                                 if (r < 0)
1344                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1345                         }
1346                 }
1347
1348                 /* Set the default for later on, but don't actually
1349                  * open the logs like this for now. Note that if we
1350                  * are transitioning from the initrd there might still
1351                  * be journal fd open, and we shouldn't attempt
1352                  * opening that before we parsed /proc/cmdline which
1353                  * might redirect output elsewhere. */
1354                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1355
1356         } else if (getpid() == 1) {
1357                 /* Running inside a container, as PID 1 */
1358                 arg_running_as = SYSTEMD_SYSTEM;
1359                 log_set_target(LOG_TARGET_CONSOLE);
1360                 log_close_console(); /* force reopen of /dev/console */
1361                 log_open();
1362
1363                 /* For the later on, see above... */
1364                 log_set_target(LOG_TARGET_JOURNAL);
1365
1366                 /* clear the kernel timestamp,
1367                  * because we are in a container */
1368                 kernel_timestamp.monotonic = 0ULL;
1369                 kernel_timestamp.realtime = 0ULL;
1370
1371         } else {
1372                 /* Running as user instance */
1373                 arg_running_as = SYSTEMD_USER;
1374                 log_set_target(LOG_TARGET_AUTO);
1375                 log_open();
1376
1377                 /* clear the kernel timestamp,
1378                  * because we are not PID 1 */
1379                 kernel_timestamp.monotonic = 0ULL;
1380                 kernel_timestamp.realtime = 0ULL;
1381         }
1382
1383         /* Initialize default unit */
1384         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1385         if (r < 0) {
1386                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1387                 goto finish;
1388         }
1389
1390         r = initialize_join_controllers();
1391         if (r < 0)
1392                 goto finish;
1393
1394         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1395          * /proc/$PID/fd is available. */
1396         if (getpid() == 1) {
1397                 r = mount_setup(loaded_policy);
1398                 if (r < 0)
1399                         goto finish;
1400         }
1401
1402         /* Reset all signal handlers. */
1403         assert_se(reset_all_signal_handlers() == 0);
1404
1405         ignore_signals(SIGNALS_IGNORE, -1);
1406
1407         if (parse_config_file() < 0)
1408                 goto finish;
1409
1410         if (arg_running_as == SYSTEMD_SYSTEM)
1411                 if (parse_proc_cmdline() < 0)
1412                         goto finish;
1413
1414         log_parse_environment();
1415
1416         if (parse_argv(argc, argv) < 0)
1417                 goto finish;
1418
1419         if (arg_action == ACTION_TEST &&
1420             geteuid() == 0) {
1421                 log_error("Don't run test mode as root.");
1422                 goto finish;
1423         }
1424
1425         if (arg_running_as == SYSTEMD_USER &&
1426             arg_action == ACTION_RUN &&
1427             sd_booted() <= 0) {
1428                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1429                 goto finish;
1430         }
1431
1432         if (arg_running_as == SYSTEMD_SYSTEM &&
1433             arg_action == ACTION_RUN &&
1434             running_in_chroot() > 0) {
1435                 log_error("Cannot be run in a chroot() environment.");
1436                 goto finish;
1437         }
1438
1439         if (arg_action == ACTION_HELP) {
1440                 retval = help();
1441                 goto finish;
1442         } else if (arg_action == ACTION_VERSION) {
1443                 retval = version();
1444                 goto finish;
1445         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1446                 unit_dump_config_items(stdout);
1447                 retval = EXIT_SUCCESS;
1448                 goto finish;
1449         } else if (arg_action == ACTION_DONE) {
1450                 retval = EXIT_SUCCESS;
1451                 goto finish;
1452         }
1453
1454         if (arg_running_as == SYSTEMD_USER &&
1455             !getenv("XDG_RUNTIME_DIR")) {
1456                 log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
1457                 goto finish;
1458         }
1459
1460         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1461
1462         /* Close logging fds, in order not to confuse fdset below */
1463         log_close();
1464
1465         /* Remember open file descriptors for later deserialization */
1466         r = fdset_new_fill(&fds);
1467         if (r < 0) {
1468                 log_error("Failed to allocate fd set: %s", strerror(-r));
1469                 goto finish;
1470         } else
1471                 fdset_cloexec(fds, true);
1472
1473         if (arg_serialization)
1474                 assert_se(fdset_remove(fds, fileno(arg_serialization)) >= 0);
1475
1476         if (arg_running_as == SYSTEMD_SYSTEM)
1477                 /* Become a session leader if we aren't one yet. */
1478                 setsid();
1479
1480         /* Move out of the way, so that we won't block unmounts */
1481         assert_se(chdir("/")  == 0);
1482
1483         /* Reset the console, but only if this is really init and we
1484          * are freshly booted */
1485         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1486                 console_setup(getpid() == 1 && !skip_setup);
1487
1488         /* Open the logging devices, if possible and necessary */
1489         log_open();
1490
1491         /* Make sure we leave a core dump without panicing the
1492          * kernel. */
1493         if (getpid() == 1) {
1494                 install_crash_handler();
1495
1496                 r = mount_cgroup_controllers(arg_join_controllers);
1497                 if (r < 0)
1498                         goto finish;
1499         }
1500
1501         if (arg_running_as == SYSTEMD_SYSTEM) {
1502                 const char *virtualization = NULL;
1503
1504                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1505
1506                 detect_virtualization(&virtualization);
1507                 if (virtualization)
1508                         log_info("Detected virtualization '%s'.", virtualization);
1509
1510                 if (in_initrd())
1511                         log_info("Running in initial RAM disk.");
1512
1513         } else {
1514                 _cleanup_free_ char *t = uid_to_name(getuid());
1515                 log_debug(PACKAGE_STRING " running in user mode for user "PID_FMT"/%s. (" SYSTEMD_FEATURES ")",
1516                           getuid(), t);
1517         }
1518
1519         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1520                 if (arg_show_status > 0 || plymouth_running())
1521                         status_welcome();
1522
1523 #ifdef HAVE_KMOD
1524                 if (detect_container(NULL) <= 0)
1525                         kmod_setup();
1526 #endif
1527                 hostname_setup();
1528                 machine_id_setup();
1529                 loopback_setup();
1530
1531                 test_mtab();
1532                 test_usr();
1533                 test_cgroups();
1534         }
1535
1536         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1537                 watchdog_set_timeout(&arg_runtime_watchdog);
1538
1539         if (arg_timer_slack_nsec != (nsec_t) -1)
1540                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1541                         log_error("Failed to adjust timer slack: %m");
1542
1543         if (arg_capability_bounding_set_drop) {
1544                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1545                 if (r < 0) {
1546                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1547                         goto finish;
1548                 }
1549                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1550                 if (r < 0) {
1551                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1552                         goto finish;
1553                 }
1554         }
1555
1556         if (arg_syscall_archs) {
1557                 r = enforce_syscall_archs(arg_syscall_archs);
1558                 if (r < 0)
1559                         goto finish;
1560         }
1561
1562         if (arg_running_as == SYSTEMD_USER) {
1563                 /* Become reaper of our children */
1564                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1565                         log_warning("Failed to make us a subreaper: %m");
1566                         if (errno == EINVAL)
1567                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1568                 }
1569         }
1570
1571         if (arg_running_as == SYSTEMD_SYSTEM)
1572                 bump_rlimit_nofile(&saved_rlimit_nofile);
1573
1574         r = manager_new(arg_running_as, &m);
1575         if (r < 0) {
1576                 log_error("Failed to allocate manager object: %s", strerror(-r));
1577                 goto finish;
1578         }
1579
1580         m->confirm_spawn = arg_confirm_spawn;
1581         m->default_std_output = arg_default_std_output;
1582         m->default_std_error = arg_default_std_error;
1583         m->default_restart_usec = arg_default_restart_usec;
1584         m->default_timeout_start_usec = arg_default_timeout_start_usec;
1585         m->default_timeout_stop_usec = arg_default_timeout_stop_usec;
1586         m->default_start_limit_interval = arg_default_start_limit_interval;
1587         m->default_start_limit_burst = arg_default_start_limit_burst;
1588         m->runtime_watchdog = arg_runtime_watchdog;
1589         m->shutdown_watchdog = arg_shutdown_watchdog;
1590         m->userspace_timestamp = userspace_timestamp;
1591         m->kernel_timestamp = kernel_timestamp;
1592         m->initrd_timestamp = initrd_timestamp;
1593         m->security_start_timestamp = security_start_timestamp;
1594         m->security_finish_timestamp = security_finish_timestamp;
1595
1596         manager_set_default_rlimits(m, arg_default_rlimit);
1597
1598         if (arg_default_environment)
1599                 manager_environment_add(m, NULL, arg_default_environment);
1600
1601         if (arg_show_status == SHOW_STATUS_UNSET)
1602                 arg_show_status = SHOW_STATUS_YES;
1603         manager_set_show_status(m, arg_show_status);
1604
1605         /* Remember whether we should queue the default job */
1606         queue_default_job = !arg_serialization || arg_switched_root;
1607
1608         before_startup = now(CLOCK_MONOTONIC);
1609
1610         r = manager_startup(m, arg_serialization, fds);
1611         if (r < 0)
1612                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1613
1614         /* This will close all file descriptors that were opened, but
1615          * not claimed by any unit. */
1616         fdset_free(fds);
1617         fds = NULL;
1618
1619         if (arg_serialization) {
1620                 fclose(arg_serialization);
1621                 arg_serialization = NULL;
1622         }
1623
1624         if (queue_default_job) {
1625                 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1626                 Unit *target = NULL;
1627                 Job *default_unit_job;
1628
1629                 log_debug("Activating default unit: %s", arg_default_unit);
1630
1631                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1632                 if (r < 0)
1633                         log_error("Failed to load default target: %s", bus_error_message(&error, r));
1634                 else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1635                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1636                 else if (target->load_state == UNIT_MASKED)
1637                         log_error("Default target masked.");
1638
1639                 if (!target || target->load_state != UNIT_LOADED) {
1640                         log_info("Trying to load rescue target...");
1641
1642                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1643                         if (r < 0) {
1644                                 log_error("Failed to load rescue target: %s", bus_error_message(&error, r));
1645                                 goto finish;
1646                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1647                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1648                                 goto finish;
1649                         } else if (target->load_state == UNIT_MASKED) {
1650                                 log_error("Rescue target masked.");
1651                                 goto finish;
1652                         }
1653                 }
1654
1655                 assert(target->load_state == UNIT_LOADED);
1656
1657                 if (arg_action == ACTION_TEST) {
1658                         printf("-> By units:\n");
1659                         manager_dump_units(m, stdout, "\t");
1660                 }
1661
1662                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1663                 if (r == -EPERM) {
1664                         log_debug("Default target could not be isolated, starting instead: %s", bus_error_message(&error, r));
1665
1666                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1667                         if (r < 0) {
1668                                 log_error("Failed to start default target: %s", bus_error_message(&error, r));
1669                                 goto finish;
1670                         }
1671                 } else if (r < 0) {
1672                         log_error("Failed to isolate default target: %s", bus_error_message(&error, r));
1673                         goto finish;
1674                 }
1675
1676                 m->default_unit_job_id = default_unit_job->id;
1677
1678                 after_startup = now(CLOCK_MONOTONIC);
1679                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1680                          "Loaded units and determined initial transaction in %s.",
1681                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1682
1683                 if (arg_action == ACTION_TEST) {
1684                         printf("-> By jobs:\n");
1685                         manager_dump_jobs(m, stdout, "\t");
1686                         retval = EXIT_SUCCESS;
1687                         goto finish;
1688                 }
1689         }
1690
1691         for (;;) {
1692                 r = manager_loop(m);
1693                 if (r < 0) {
1694                         log_error("Failed to run mainloop: %s", strerror(-r));
1695                         goto finish;
1696                 }
1697
1698                 switch (m->exit_code) {
1699
1700                 case MANAGER_EXIT:
1701                         retval = EXIT_SUCCESS;
1702                         log_debug("Exit.");
1703                         goto finish;
1704
1705                 case MANAGER_RELOAD:
1706                         log_info("Reloading.");
1707                         r = manager_reload(m);
1708                         if (r < 0)
1709                                 log_error("Failed to reload: %s", strerror(-r));
1710                         break;
1711
1712                 case MANAGER_REEXECUTE:
1713
1714                         if (prepare_reexecute(m, &arg_serialization, &fds, false) < 0)
1715                                 goto finish;
1716
1717                         reexecute = true;
1718                         log_notice("Reexecuting.");
1719                         goto finish;
1720
1721                 case MANAGER_SWITCH_ROOT:
1722                         /* Steal the switch root parameters */
1723                         switch_root_dir = m->switch_root;
1724                         switch_root_init = m->switch_root_init;
1725                         m->switch_root = m->switch_root_init = NULL;
1726
1727                         if (!switch_root_init)
1728                                 if (prepare_reexecute(m, &arg_serialization, &fds, true) < 0)
1729                                         goto finish;
1730
1731                         reexecute = true;
1732                         log_notice("Switching root.");
1733                         goto finish;
1734
1735                 case MANAGER_REBOOT:
1736                 case MANAGER_POWEROFF:
1737                 case MANAGER_HALT:
1738                 case MANAGER_KEXEC: {
1739                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1740                                 [MANAGER_REBOOT] = "reboot",
1741                                 [MANAGER_POWEROFF] = "poweroff",
1742                                 [MANAGER_HALT] = "halt",
1743                                 [MANAGER_KEXEC] = "kexec"
1744                         };
1745
1746                         assert_se(shutdown_verb = table[m->exit_code]);
1747                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1748
1749                         log_notice("Shutting down.");
1750                         goto finish;
1751                 }
1752
1753                 default:
1754                         assert_not_reached("Unknown exit code.");
1755                 }
1756         }
1757
1758 finish:
1759         if (m) {
1760                 manager_free(m);
1761                 m = NULL;
1762         }
1763
1764         for (j = 0; j < ELEMENTSOF(arg_default_rlimit); j++) {
1765                 free(arg_default_rlimit[j]);
1766                 arg_default_rlimit[j] = NULL;
1767         }
1768
1769         free(arg_default_unit);
1770         arg_default_unit = NULL;
1771
1772         free_join_controllers();
1773
1774         strv_free(arg_default_environment);
1775         arg_default_environment = NULL;
1776
1777         set_free(arg_syscall_archs);
1778         arg_syscall_archs = NULL;
1779
1780         label_finish();
1781
1782         if (reexecute) {
1783                 const char **args;
1784                 unsigned i, args_size;
1785
1786                 /* Close and disarm the watchdog, so that the new
1787                  * instance can reinitialize it, but doesn't get
1788                  * rebooted while we do that */
1789                 watchdog_close(true);
1790
1791                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1792                  * that the new systemd can pass the kernel default to
1793                  * its child processes */
1794                 if (saved_rlimit_nofile.rlim_cur > 0)
1795                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1796
1797                 if (switch_root_dir) {
1798                         /* Kill all remaining processes from the
1799                          * initrd, but don't wait for them, so that we
1800                          * can handle the SIGCHLD for them after
1801                          * deserializing. */
1802                         broadcast_signal(SIGTERM, false, true);
1803
1804                         /* And switch root */
1805                         r = switch_root(switch_root_dir);
1806                         if (r < 0)
1807                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1808                 }
1809
1810                 args_size = MAX(6, argc+1);
1811                 args = newa(const char*, args_size);
1812
1813                 if (!switch_root_init) {
1814                         char sfd[16];
1815
1816                         /* First try to spawn ourselves with the right
1817                          * path, and with full serialization. We do
1818                          * this only if the user didn't specify an
1819                          * explicit init to spawn. */
1820
1821                         assert(arg_serialization);
1822                         assert(fds);
1823
1824                         snprintf(sfd, sizeof(sfd), "%i", fileno(arg_serialization));
1825                         char_array_0(sfd);
1826
1827                         i = 0;
1828                         args[i++] = SYSTEMD_BINARY_PATH;
1829                         if (switch_root_dir)
1830                                 args[i++] = "--switched-root";
1831                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1832                         args[i++] = "--deserialize";
1833                         args[i++] = sfd;
1834                         args[i++] = NULL;
1835
1836                         /* do not pass along the environment we inherit from the kernel or initrd */
1837                         if (switch_root_dir)
1838                                 clearenv();
1839
1840                         assert(i <= args_size);
1841                         execv(args[0], (char* const*) args);
1842                 }
1843
1844                 /* Try the fallback, if there is any, without any
1845                  * serialization. We pass the original argv[] and
1846                  * envp[]. (Well, modulo the ordering changes due to
1847                  * getopt() in argv[], and some cleanups in envp[],
1848                  * but let's hope that doesn't matter.) */
1849
1850                 if (arg_serialization) {
1851                         fclose(arg_serialization);
1852                         arg_serialization = NULL;
1853                 }
1854
1855                 if (fds) {
1856                         fdset_free(fds);
1857                         fds = NULL;
1858                 }
1859
1860                 /* Reopen the console */
1861                 make_console_stdio();
1862
1863                 for (j = 1, i = 1; j < (unsigned) argc; j++)
1864                         args[i++] = argv[j];
1865                 args[i++] = NULL;
1866                 assert(i <= args_size);
1867
1868                 if (switch_root_init) {
1869                         args[0] = switch_root_init;
1870                         execv(args[0], (char* const*) args);
1871                         log_warning("Failed to execute configured init, trying fallback: %m");
1872                 }
1873
1874                 args[0] = "/sbin/init";
1875                 execv(args[0], (char* const*) args);
1876
1877                 if (errno == ENOENT) {
1878                         log_warning("No /sbin/init, trying fallback");
1879
1880                         args[0] = "/bin/sh";
1881                         args[1] = NULL;
1882                         execv(args[0], (char* const*) args);
1883                         log_error("Failed to execute /bin/sh, giving up: %m");
1884                 } else
1885                         log_warning("Failed to execute /sbin/init, giving up: %m");
1886         }
1887
1888         if (arg_serialization) {
1889                 fclose(arg_serialization);
1890                 arg_serialization = NULL;
1891         }
1892
1893         if (fds) {
1894                 fdset_free(fds);
1895                 fds = NULL;
1896         }
1897
1898 #ifdef HAVE_VALGRIND_VALGRIND_H
1899         /* If we are PID 1 and running under valgrind, then let's exit
1900          * here explicitly. valgrind will only generate nice output on
1901          * exit(), not on exec(), hence let's do the former not the
1902          * latter here. */
1903         if (getpid() == 1 && RUNNING_ON_VALGRIND)
1904                 return 0;
1905 #endif
1906
1907         if (shutdown_verb) {
1908                 const char * command_line[] = {
1909                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1910                         shutdown_verb,
1911                         NULL
1912                 };
1913                 _cleanup_strv_free_ char **env_block = NULL;
1914                 env_block = strv_copy(environ);
1915
1916                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1917                         char *e;
1918
1919                         /* If we reboot let's set the shutdown
1920                          * watchdog and tell the shutdown binary to
1921                          * repeatedly ping it */
1922                         watchdog_set_timeout(&arg_shutdown_watchdog);
1923                         watchdog_close(false);
1924
1925                         /* Tell the binary how often to ping, ignore failure */
1926                         if (asprintf(&e, "WATCHDOG_USEC="USEC_FMT, arg_shutdown_watchdog) > 0)
1927                                 strv_push(&env_block, e);
1928                 } else
1929                         watchdog_close(true);
1930
1931                 /* Avoid the creation of new processes forked by the
1932                  * kernel; at this point, we will not listen to the
1933                  * signals anyway */
1934                 if (detect_container(NULL) <= 0)
1935                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1936
1937                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1938                 log_error("Failed to execute shutdown binary, freezing: %m");
1939         }
1940
1941         if (getpid() == 1)
1942                 freeze();
1943
1944         return retval;
1945 }