chiark / gitweb /
main: drop capabilities of userhelpers before ours
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dbus/dbus.h>
23
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <getopt.h>
31 #include <signal.h>
32 #include <sys/wait.h>
33 #include <fcntl.h>
34 #include <sys/prctl.h>
35 #include <sys/mount.h>
36
37 #include "manager.h"
38 #include "log.h"
39 #include "load-fragment.h"
40 #include "fdset.h"
41 #include "special.h"
42 #include "conf-parser.h"
43 #include "dbus-common.h"
44 #include "missing.h"
45 #include "label.h"
46 #include "build.h"
47 #include "strv.h"
48 #include "def.h"
49 #include "virt.h"
50 #include "watchdog.h"
51 #include "path-util.h"
52 #include "switch-root.h"
53 #include "capability.h"
54 #include "killall.h"
55 #include "env-util.h"
56 #include "hwclock.h"
57 #include "sd-daemon.h"
58 #include "sd-messages.h"
59
60 #include "mount-setup.h"
61 #include "loopback-setup.h"
62 #ifdef HAVE_KMOD
63 #include "kmod-setup.h"
64 #endif
65 #include "hostname-setup.h"
66 #include "machine-id-setup.h"
67 #include "selinux-setup.h"
68 #include "ima-setup.h"
69 #include "fileio.h"
70 #include "smack-setup.h"
71
72 static enum {
73         ACTION_RUN,
74         ACTION_HELP,
75         ACTION_VERSION,
76         ACTION_TEST,
77         ACTION_DUMP_CONFIGURATION_ITEMS,
78         ACTION_DONE
79 } arg_action = ACTION_RUN;
80
81 static char *arg_default_unit = NULL;
82 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
83
84 static bool arg_dump_core = true;
85 static bool arg_crash_shell = false;
86 static int arg_crash_chvt = -1;
87 static bool arg_confirm_spawn = false;
88 static bool arg_show_status = true;
89 static bool arg_switched_root = false;
90 static char ***arg_join_controllers = NULL;
91 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
92 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
93 static usec_t arg_runtime_watchdog = 0;
94 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
95 static char **arg_default_environment = NULL;
96 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
97 static uint64_t arg_capability_bounding_set_drop = 0;
98 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
99
100 static FILE* serialization = NULL;
101
102 static void nop_handler(int sig) {
103 }
104
105 _noreturn_ static void crash(int sig) {
106
107         if (getpid() != 1)
108                 /* Pass this on immediately, if this is not PID 1 */
109                 raise(sig);
110         else if (!arg_dump_core)
111                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
112         else {
113                 struct sigaction sa = {
114                         .sa_handler = nop_handler,
115                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
116                 };
117                 pid_t pid;
118
119                 /* We want to wait for the core process, hence let's enable SIGCHLD */
120                 sigaction(SIGCHLD, &sa, NULL);
121
122                 pid = fork();
123                 if (pid < 0)
124                         log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno));
125
126                 else if (pid == 0) {
127                         struct rlimit rl = {};
128
129                         /* Enable default signal handler for core dump */
130                         zero(sa);
131                         sa.sa_handler = SIG_DFL;
132                         sigaction(sig, &sa, NULL);
133
134                         /* Don't limit the core dump size */
135                         rl.rlim_cur = RLIM_INFINITY;
136                         rl.rlim_max = RLIM_INFINITY;
137                         setrlimit(RLIMIT_CORE, &rl);
138
139                         /* Just to be sure... */
140                         chdir("/");
141
142                         /* Raise the signal again */
143                         raise(sig);
144
145                         assert_not_reached("We shouldn't be here...");
146                         _exit(1);
147
148                 } else {
149                         siginfo_t status;
150                         int r;
151
152                         /* Order things nicely. */
153                         r = wait_for_terminate(pid, &status);
154                         if (r < 0)
155                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
156                         else if (status.si_code != CLD_DUMPED)
157                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
158                         else
159                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
160                 }
161         }
162
163         if (arg_crash_chvt)
164                 chvt(arg_crash_chvt);
165
166         if (arg_crash_shell) {
167                 struct sigaction sa = {
168                         .sa_handler = SIG_IGN,
169                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
170                 };
171                 pid_t pid;
172
173                 log_info("Executing crash shell in 10s...");
174                 sleep(10);
175
176                 /* Let the kernel reap children for us */
177                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
178
179                 pid = fork();
180                 if (pid < 0)
181                         log_error("Failed to fork off crash shell: %m");
182                 else if (pid == 0) {
183                         make_console_stdio();
184                         execl("/bin/sh", "/bin/sh", NULL);
185
186                         log_error("execl() failed: %m");
187                         _exit(1);
188                 }
189
190                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
191         }
192
193         log_info("Freezing execution.");
194         freeze();
195 }
196
197 static void install_crash_handler(void) {
198         struct sigaction sa = {
199                 .sa_handler = crash,
200                 .sa_flags = SA_NODEFER,
201         };
202
203         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
204 }
205
206 static int console_setup(bool do_reset) {
207         int tty_fd, r;
208
209         /* If we are init, we connect stdin/stdout/stderr to /dev/null
210          * and make sure we don't have a controlling tty. */
211
212         release_terminal();
213
214         if (!do_reset)
215                 return 0;
216
217         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
218         if (tty_fd < 0) {
219                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
220                 return -tty_fd;
221         }
222
223         /* We don't want to force text mode.
224          * plymouth may be showing pictures already from initrd. */
225         r = reset_terminal_fd(tty_fd, false);
226         if (r < 0)
227                 log_error("Failed to reset /dev/console: %s", strerror(-r));
228
229         close_nointr_nofail(tty_fd);
230         return r;
231 }
232
233 static int set_default_unit(const char *u) {
234         char *c;
235
236         assert(u);
237
238         c = strdup(u);
239         if (!c)
240                 return -ENOMEM;
241
242         free(arg_default_unit);
243         arg_default_unit = c;
244
245         return 0;
246 }
247
248 static int parse_proc_cmdline_word(const char *word) {
249
250         static const char * const rlmap[] = {
251                 "emergency", SPECIAL_EMERGENCY_TARGET,
252                 "-b",        SPECIAL_EMERGENCY_TARGET,
253                 "single",    SPECIAL_RESCUE_TARGET,
254                 "-s",        SPECIAL_RESCUE_TARGET,
255                 "s",         SPECIAL_RESCUE_TARGET,
256                 "S",         SPECIAL_RESCUE_TARGET,
257                 "1",         SPECIAL_RESCUE_TARGET,
258                 "2",         SPECIAL_RUNLEVEL2_TARGET,
259                 "3",         SPECIAL_RUNLEVEL3_TARGET,
260                 "4",         SPECIAL_RUNLEVEL4_TARGET,
261                 "5",         SPECIAL_RUNLEVEL5_TARGET,
262         };
263
264         assert(word);
265
266         if (startswith(word, "systemd.unit=")) {
267
268                 if (!in_initrd())
269                         return set_default_unit(word + 13);
270
271         } else if (startswith(word, "rd.systemd.unit=")) {
272
273                 if (in_initrd())
274                         return set_default_unit(word + 16);
275
276         } else if (startswith(word, "systemd.log_target=")) {
277
278                 if (log_set_target_from_string(word + 19) < 0)
279                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
280
281         } else if (startswith(word, "systemd.log_level=")) {
282
283                 if (log_set_max_level_from_string(word + 18) < 0)
284                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
285
286         } else if (startswith(word, "systemd.log_color=")) {
287
288                 if (log_show_color_from_string(word + 18) < 0)
289                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
290
291         } else if (startswith(word, "systemd.log_location=")) {
292
293                 if (log_show_location_from_string(word + 21) < 0)
294                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
295
296         } else if (startswith(word, "systemd.dump_core=")) {
297                 int r;
298
299                 if ((r = parse_boolean(word + 18)) < 0)
300                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
301                 else
302                         arg_dump_core = r;
303
304         } else if (startswith(word, "systemd.crash_shell=")) {
305                 int r;
306
307                 if ((r = parse_boolean(word + 20)) < 0)
308                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
309                 else
310                         arg_crash_shell = r;
311
312         } else if (startswith(word, "systemd.confirm_spawn=")) {
313                 int r;
314
315                 if ((r = parse_boolean(word + 22)) < 0)
316                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
317                 else
318                         arg_confirm_spawn = r;
319
320         } else if (startswith(word, "systemd.crash_chvt=")) {
321                 int k;
322
323                 if (safe_atoi(word + 19, &k) < 0)
324                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
325                 else
326                         arg_crash_chvt = k;
327
328         } else if (startswith(word, "systemd.show_status=")) {
329                 int r;
330
331                 if ((r = parse_boolean(word + 20)) < 0)
332                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
333                 else
334                         arg_show_status = r;
335         } else if (startswith(word, "systemd.default_standard_output=")) {
336                 int r;
337
338                 if ((r = exec_output_from_string(word + 32)) < 0)
339                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
340                 else
341                         arg_default_std_output = r;
342         } else if (startswith(word, "systemd.default_standard_error=")) {
343                 int r;
344
345                 if ((r = exec_output_from_string(word + 31)) < 0)
346                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
347                 else
348                         arg_default_std_error = r;
349         } else if (startswith(word, "systemd.setenv=")) {
350                 _cleanup_free_ char *cenv = NULL;
351
352                 cenv = strdup(word + 15);
353                 if (!cenv)
354                         return -ENOMEM;
355
356                 if (env_assignment_is_valid(cenv)) {
357                         char **env;
358
359                         env = strv_env_set(arg_default_environment, cenv);
360                         if (env)
361                                 arg_default_environment = env;
362                         else
363                                 log_warning("Setting environment variable '%s' failed, ignoring: %m", cenv);
364                 } else
365                         log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
366
367         } else if (startswith(word, "systemd.") ||
368                    (in_initrd() && startswith(word, "rd.systemd."))) {
369
370                 const char *c;
371
372                 /* Ignore systemd.journald.xyz and friends */
373                 c = word;
374                 if (startswith(c, "rd."))
375                         c += 3;
376                 if (startswith(c, "systemd."))
377                         c += 8;
378                 if (c[strcspn(c, ".=")] != '.')  {
379
380                         log_warning("Unknown kernel switch %s. Ignoring.", word);
381
382                         log_info("Supported kernel switches:\n"
383                                  "systemd.unit=UNIT                        Default unit to start\n"
384                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
385                                  "systemd.dump_core=0|1                    Dump core on crash\n"
386                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
387                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
388                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
389                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
390                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
391                                  "                                         Log target\n"
392                                  "systemd.log_level=LEVEL                  Log level\n"
393                                  "systemd.log_color=0|1                    Highlight important log messages\n"
394                                  "systemd.log_location=0|1                 Include code location in log messages\n"
395                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
396                                  "                                         Set default log output for services\n"
397                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
398                                  "                                         Set default log error output for services\n"
399                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
400                 }
401
402         } else if (streq(word, "quiet"))
403                 arg_show_status = false;
404         else if (streq(word, "debug")) {
405                 /* Log to kmsg, the journal socket will fill up before the
406                  * journal is started and tools running during that time
407                  * will block with every log message for for 60 seconds,
408                  * before they give up. */
409                 log_set_max_level(LOG_DEBUG);
410                 log_set_target(LOG_TARGET_KMSG);
411         } else if (!in_initrd()) {
412                 unsigned i;
413
414                 /* SysV compatibility */
415                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
416                         if (streq(word, rlmap[i]))
417                                 return set_default_unit(rlmap[i+1]);
418         }
419
420         return 0;
421 }
422
423 #define DEFINE_SETTER(name, func, descr)                              \
424         static int name(const char *unit,                             \
425                         const char *filename,                         \
426                         unsigned line,                                \
427                         const char *section,                          \
428                         const char *lvalue,                           \
429                         int ltype,                                    \
430                         const char *rvalue,                           \
431                         void *data,                                   \
432                         void *userdata) {                             \
433                                                                       \
434                 int r;                                                \
435                                                                       \
436                 assert(filename);                                     \
437                 assert(lvalue);                                       \
438                 assert(rvalue);                                       \
439                                                                       \
440                 r = func(rvalue);                                     \
441                 if (r < 0)                                            \
442                         log_syntax(unit, LOG_ERR, filename, line, -r, \
443                                    "Invalid " descr "'%s': %s",       \
444                                    rvalue, strerror(-r));             \
445                                                                       \
446                 return 0;                                             \
447         }
448
449 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
450 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
451 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
452 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
453
454
455 static int config_parse_cpu_affinity2(const char *unit,
456                                       const char *filename,
457                                       unsigned line,
458                                       const char *section,
459                                       const char *lvalue,
460                                       int ltype,
461                                       const char *rvalue,
462                                       void *data,
463                                       void *userdata) {
464
465         char *w;
466         size_t l;
467         char *state;
468         cpu_set_t *c = NULL;
469         unsigned ncpus = 0;
470
471         assert(filename);
472         assert(lvalue);
473         assert(rvalue);
474
475         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
476                 char *t;
477                 int r;
478                 unsigned cpu;
479
480                 if (!(t = strndup(w, l)))
481                         return log_oom();
482
483                 r = safe_atou(t, &cpu);
484                 free(t);
485
486                 if (!c)
487                         if (!(c = cpu_set_malloc(&ncpus)))
488                                 return log_oom();
489
490                 if (r < 0 || cpu >= ncpus) {
491                         log_syntax(unit, LOG_ERR, filename, line, -r,
492                                    "Failed to parse CPU affinity '%s'", rvalue);
493                         CPU_FREE(c);
494                         return -EBADMSG;
495                 }
496
497                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
498         }
499
500         if (c) {
501                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
502                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
503
504                 CPU_FREE(c);
505         }
506
507         return 0;
508 }
509
510 static void strv_free_free(char ***l) {
511         char ***i;
512
513         if (!l)
514                 return;
515
516         for (i = l; *i; i++)
517                 strv_free(*i);
518
519         free(l);
520 }
521
522 static void free_join_controllers(void) {
523         strv_free_free(arg_join_controllers);
524         arg_join_controllers = NULL;
525 }
526
527 static int config_parse_join_controllers(const char *unit,
528                                          const char *filename,
529                                          unsigned line,
530                                          const char *section,
531                                          const char *lvalue,
532                                          int ltype,
533                                          const char *rvalue,
534                                          void *data,
535                                          void *userdata) {
536
537         unsigned n = 0;
538         char *state, *w;
539         size_t length;
540
541         assert(filename);
542         assert(lvalue);
543         assert(rvalue);
544
545         free_join_controllers();
546
547         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
548                 char *s, **l;
549
550                 s = strndup(w, length);
551                 if (!s)
552                         return log_oom();
553
554                 l = strv_split(s, ",");
555                 free(s);
556
557                 strv_uniq(l);
558
559                 if (strv_length(l) <= 1) {
560                         strv_free(l);
561                         continue;
562                 }
563
564                 if (!arg_join_controllers) {
565                         arg_join_controllers = new(char**, 2);
566                         if (!arg_join_controllers) {
567                                 strv_free(l);
568                                 return log_oom();
569                         }
570
571                         arg_join_controllers[0] = l;
572                         arg_join_controllers[1] = NULL;
573
574                         n = 1;
575                 } else {
576                         char ***a;
577                         char ***t;
578
579                         t = new0(char**, n+2);
580                         if (!t) {
581                                 strv_free(l);
582                                 return log_oom();
583                         }
584
585                         n = 0;
586
587                         for (a = arg_join_controllers; *a; a++) {
588
589                                 if (strv_overlap(*a, l)) {
590                                         char **c;
591
592                                         c = strv_merge(*a, l);
593                                         if (!c) {
594                                                 strv_free(l);
595                                                 strv_free_free(t);
596                                                 return log_oom();
597                                         }
598
599                                         strv_free(l);
600                                         l = c;
601                                 } else {
602                                         char **c;
603
604                                         c = strv_copy(*a);
605                                         if (!c) {
606                                                 strv_free(l);
607                                                 strv_free_free(t);
608                                                 return log_oom();
609                                         }
610
611                                         t[n++] = c;
612                                 }
613                         }
614
615                         t[n++] = strv_uniq(l);
616
617                         strv_free_free(arg_join_controllers);
618                         arg_join_controllers = t;
619                 }
620         }
621
622         return 0;
623 }
624
625 static int parse_config_file(void) {
626
627         const ConfigTableItem items[] = {
628                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
629                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
630                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
631                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
632                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
633                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
634                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
635                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
636                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
637                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
638                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
639                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
640                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
641                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
642                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
643                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
644                 { "Manager", "DefaultEnvironment",    config_parse_environ,      0, &arg_default_environment },
645                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
646                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
647                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
648                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
649                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
650                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
651                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
652                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
653                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
654                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
655                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
656                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
657                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
658                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
659                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
660                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
661                 { NULL, NULL, NULL, 0, NULL }
662         };
663
664         _cleanup_fclose_ FILE *f;
665         const char *fn;
666         int r;
667
668         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
669         f = fopen(fn, "re");
670         if (!f) {
671                 if (errno == ENOENT)
672                         return 0;
673
674                 log_warning("Failed to open configuration file '%s': %m", fn);
675                 return 0;
676         }
677
678         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
679         if (r < 0)
680                 log_warning("Failed to parse configuration file: %s", strerror(-r));
681
682         return 0;
683 }
684
685 static int parse_proc_cmdline(void) {
686         _cleanup_free_ char *line = NULL;
687         char *w, *state;
688         int r;
689         size_t l;
690
691         /* Don't read /proc/cmdline if we are in a container, since
692          * that is only relevant for the host system */
693         if (detect_container(NULL) > 0)
694                 return 0;
695
696         r = read_one_line_file("/proc/cmdline", &line);
697         if (r < 0) {
698                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
699                 return 0;
700         }
701
702         FOREACH_WORD_QUOTED(w, l, line, state) {
703                 _cleanup_free_ char *word;
704
705                 word = strndup(w, l);
706                 if (!word)
707                         return log_oom();
708
709                 r = parse_proc_cmdline_word(word);
710                 if (r < 0) {
711                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
712                         return r;
713                 }
714         }
715
716         return 0;
717 }
718
719 static int parse_argv(int argc, char *argv[]) {
720
721         enum {
722                 ARG_LOG_LEVEL = 0x100,
723                 ARG_LOG_TARGET,
724                 ARG_LOG_COLOR,
725                 ARG_LOG_LOCATION,
726                 ARG_UNIT,
727                 ARG_SYSTEM,
728                 ARG_USER,
729                 ARG_TEST,
730                 ARG_VERSION,
731                 ARG_DUMP_CONFIGURATION_ITEMS,
732                 ARG_DUMP_CORE,
733                 ARG_CRASH_SHELL,
734                 ARG_CONFIRM_SPAWN,
735                 ARG_SHOW_STATUS,
736                 ARG_DESERIALIZE,
737                 ARG_SWITCHED_ROOT,
738                 ARG_INTROSPECT,
739                 ARG_DEFAULT_STD_OUTPUT,
740                 ARG_DEFAULT_STD_ERROR
741         };
742
743         static const struct option options[] = {
744                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
745                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
746                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
747                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
748                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
749                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
750                 { "user",                     no_argument,       NULL, ARG_USER                     },
751                 { "test",                     no_argument,       NULL, ARG_TEST                     },
752                 { "help",                     no_argument,       NULL, 'h'                          },
753                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
754                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
755                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
756                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
757                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
758                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
759                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
760                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
761                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
762                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
763                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
764                 { NULL,                       0,                 NULL, 0                            }
765         };
766
767         int c, r;
768
769         assert(argc >= 1);
770         assert(argv);
771
772         if (getpid() == 1)
773                 opterr = 0;
774
775         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
776
777                 switch (c) {
778
779                 case ARG_LOG_LEVEL:
780                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
781                                 log_error("Failed to parse log level %s.", optarg);
782                                 return r;
783                         }
784
785                         break;
786
787                 case ARG_LOG_TARGET:
788
789                         if ((r = log_set_target_from_string(optarg)) < 0) {
790                                 log_error("Failed to parse log target %s.", optarg);
791                                 return r;
792                         }
793
794                         break;
795
796                 case ARG_LOG_COLOR:
797
798                         if (optarg) {
799                                 if ((r = log_show_color_from_string(optarg)) < 0) {
800                                         log_error("Failed to parse log color setting %s.", optarg);
801                                         return r;
802                                 }
803                         } else
804                                 log_show_color(true);
805
806                         break;
807
808                 case ARG_LOG_LOCATION:
809
810                         if (optarg) {
811                                 if ((r = log_show_location_from_string(optarg)) < 0) {
812                                         log_error("Failed to parse log location setting %s.", optarg);
813                                         return r;
814                                 }
815                         } else
816                                 log_show_location(true);
817
818                         break;
819
820                 case ARG_DEFAULT_STD_OUTPUT:
821
822                         if ((r = exec_output_from_string(optarg)) < 0) {
823                                 log_error("Failed to parse default standard output setting %s.", optarg);
824                                 return r;
825                         } else
826                                 arg_default_std_output = r;
827                         break;
828
829                 case ARG_DEFAULT_STD_ERROR:
830
831                         if ((r = exec_output_from_string(optarg)) < 0) {
832                                 log_error("Failed to parse default standard error output setting %s.", optarg);
833                                 return r;
834                         } else
835                                 arg_default_std_error = r;
836                         break;
837
838                 case ARG_UNIT:
839
840                         if ((r = set_default_unit(optarg)) < 0) {
841                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
842                                 return r;
843                         }
844
845                         break;
846
847                 case ARG_SYSTEM:
848                         arg_running_as = SYSTEMD_SYSTEM;
849                         break;
850
851                 case ARG_USER:
852                         arg_running_as = SYSTEMD_USER;
853                         break;
854
855                 case ARG_TEST:
856                         arg_action = ACTION_TEST;
857                         break;
858
859                 case ARG_VERSION:
860                         arg_action = ACTION_VERSION;
861                         break;
862
863                 case ARG_DUMP_CONFIGURATION_ITEMS:
864                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
865                         break;
866
867                 case ARG_DUMP_CORE:
868                         r = optarg ? parse_boolean(optarg) : 1;
869                         if (r < 0) {
870                                 log_error("Failed to parse dump core boolean %s.", optarg);
871                                 return r;
872                         }
873                         arg_dump_core = r;
874                         break;
875
876                 case ARG_CRASH_SHELL:
877                         r = optarg ? parse_boolean(optarg) : 1;
878                         if (r < 0) {
879                                 log_error("Failed to parse crash shell boolean %s.", optarg);
880                                 return r;
881                         }
882                         arg_crash_shell = r;
883                         break;
884
885                 case ARG_CONFIRM_SPAWN:
886                         r = optarg ? parse_boolean(optarg) : 1;
887                         if (r < 0) {
888                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
889                                 return r;
890                         }
891                         arg_confirm_spawn = r;
892                         break;
893
894                 case ARG_SHOW_STATUS:
895                         r = optarg ? parse_boolean(optarg) : 1;
896                         if (r < 0) {
897                                 log_error("Failed to parse show status boolean %s.", optarg);
898                                 return r;
899                         }
900                         arg_show_status = r;
901                         break;
902
903                 case ARG_DESERIALIZE: {
904                         int fd;
905                         FILE *f;
906
907                         r = safe_atoi(optarg, &fd);
908                         if (r < 0 || fd < 0) {
909                                 log_error("Failed to parse deserialize option %s.", optarg);
910                                 return r < 0 ? r : -EINVAL;
911                         }
912
913                         fd_cloexec(fd, true);
914
915                         f = fdopen(fd, "r");
916                         if (!f) {
917                                 log_error("Failed to open serialization fd: %m");
918                                 return -errno;
919                         }
920
921                         if (serialization)
922                                 fclose(serialization);
923
924                         serialization = f;
925
926                         break;
927                 }
928
929                 case ARG_SWITCHED_ROOT:
930                         arg_switched_root = true;
931                         break;
932
933                 case ARG_INTROSPECT: {
934                         const char * const * i = NULL;
935
936                         for (i = bus_interface_table; *i; i += 2)
937                                 if (!optarg || streq(i[0], optarg)) {
938                                         fputs(DBUS_INTROSPECT_1_0_XML_DOCTYPE_DECL_NODE
939                                               "<node>\n", stdout);
940                                         fputs(i[1], stdout);
941                                         fputs("</node>\n", stdout);
942
943                                         if (optarg)
944                                                 break;
945                                 }
946
947                         if (!i[0] && optarg)
948                                 log_error("Unknown interface %s.", optarg);
949
950                         arg_action = ACTION_DONE;
951                         break;
952                 }
953
954                 case 'h':
955                         arg_action = ACTION_HELP;
956                         break;
957
958                 case 'D':
959                         log_set_max_level(LOG_DEBUG);
960                         break;
961
962                 case 'b':
963                 case 's':
964                 case 'z':
965                         /* Just to eat away the sysvinit kernel
966                          * cmdline args without getopt() error
967                          * messages that we'll parse in
968                          * parse_proc_cmdline_word() or ignore. */
969
970                 case '?':
971                 default:
972                         if (getpid() != 1) {
973                                 log_error("Unknown option code %c", c);
974                                 return -EINVAL;
975                         }
976
977                         break;
978                 }
979
980         if (optind < argc && getpid() != 1) {
981                 /* Hmm, when we aren't run as init system
982                  * let's complain about excess arguments */
983
984                 log_error("Excess arguments.");
985                 return -EINVAL;
986         }
987
988         if (detect_container(NULL) > 0) {
989                 char **a;
990
991                 /* All /proc/cmdline arguments the kernel didn't
992                  * understand it passed to us. We're not really
993                  * interested in that usually since /proc/cmdline is
994                  * more interesting and complete. With one exception:
995                  * if we are run in a container /proc/cmdline is not
996                  * relevant for the container, hence we rely on argv[]
997                  * instead. */
998
999                 for (a = argv; a < argv + argc; a++)
1000                         if ((r = parse_proc_cmdline_word(*a)) < 0) {
1001                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
1002                                 return r;
1003                         }
1004         }
1005
1006         return 0;
1007 }
1008
1009 static int help(void) {
1010
1011         printf("%s [OPTIONS...]\n\n"
1012                "Starts up and maintains the system or user services.\n\n"
1013                "  -h --help                      Show this help\n"
1014                "     --test                      Determine startup sequence, dump it and exit\n"
1015                "     --dump-configuration-items  Dump understood unit configuration items\n"
1016                "     --introspect[=INTERFACE]    Extract D-Bus interface data\n"
1017                "     --unit=UNIT                 Set default unit\n"
1018                "     --system                    Run a system instance, even if PID != 1\n"
1019                "     --user                      Run a user instance\n"
1020                "     --dump-core[=0|1]           Dump core on crash\n"
1021                "     --crash-shell[=0|1]         Run shell on crash\n"
1022                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1023                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1024                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1025                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1026                "     --log-color[=0|1]           Highlight important log messages\n"
1027                "     --log-location[=0|1]        Include code location in log messages\n"
1028                "     --default-standard-output=  Set default standard output for services\n"
1029                "     --default-standard-error=   Set default standard error output for services\n",
1030                program_invocation_short_name);
1031
1032         return 0;
1033 }
1034
1035 static int version(void) {
1036         puts(PACKAGE_STRING);
1037         puts(SYSTEMD_FEATURES);
1038
1039         return 0;
1040 }
1041
1042 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1043         FILE *f = NULL;
1044         FDSet *fds = NULL;
1045         int r;
1046
1047         assert(m);
1048         assert(_f);
1049         assert(_fds);
1050
1051         r = manager_open_serialization(m, &f);
1052         if (r < 0) {
1053                 log_error("Failed to create serialization file: %s", strerror(-r));
1054                 goto fail;
1055         }
1056
1057         /* Make sure nothing is really destructed when we shut down */
1058         m->n_reloading ++;
1059         bus_broadcast_reloading(m, true);
1060
1061         fds = fdset_new();
1062         if (!fds) {
1063                 r = -ENOMEM;
1064                 log_error("Failed to allocate fd set: %s", strerror(-r));
1065                 goto fail;
1066         }
1067
1068         r = manager_serialize(m, f, fds, switching_root);
1069         if (r < 0) {
1070                 log_error("Failed to serialize state: %s", strerror(-r));
1071                 goto fail;
1072         }
1073
1074         if (fseeko(f, 0, SEEK_SET) < 0) {
1075                 log_error("Failed to rewind serialization fd: %m");
1076                 goto fail;
1077         }
1078
1079         r = fd_cloexec(fileno(f), false);
1080         if (r < 0) {
1081                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1082                 goto fail;
1083         }
1084
1085         r = fdset_cloexec(fds, false);
1086         if (r < 0) {
1087                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1088                 goto fail;
1089         }
1090
1091         *_f = f;
1092         *_fds = fds;
1093
1094         return 0;
1095
1096 fail:
1097         fdset_free(fds);
1098
1099         if (f)
1100                 fclose(f);
1101
1102         return r;
1103 }
1104
1105 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1106         struct rlimit nl;
1107         int r;
1108
1109         assert(saved_rlimit);
1110
1111         /* Save the original RLIMIT_NOFILE so that we can reset it
1112          * later when transitioning from the initrd to the main
1113          * systemd or suchlike. */
1114         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1115                 log_error("Reading RLIMIT_NOFILE failed: %m");
1116                 return -errno;
1117         }
1118
1119         /* Make sure forked processes get the default kernel setting */
1120         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1121                 struct rlimit *rl;
1122
1123                 rl = newdup(struct rlimit, saved_rlimit, 1);
1124                 if (!rl)
1125                         return log_oom();
1126
1127                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1128         }
1129
1130         /* Bump up the resource limit for ourselves substantially */
1131         nl.rlim_cur = nl.rlim_max = 64*1024;
1132         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1133         if (r < 0) {
1134                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1135                 return r;
1136         }
1137
1138         return 0;
1139 }
1140
1141 static void test_mtab(void) {
1142         char *p;
1143
1144         /* Check that /etc/mtab is a symlink */
1145
1146         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1147                 bool b;
1148
1149                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1150                 free(p);
1151
1152                 if (b)
1153                         return;
1154         }
1155
1156         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1157                     "This is not supported anymore. "
1158                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1159 }
1160
1161 static void test_usr(void) {
1162
1163         /* Check that /usr is not a separate fs */
1164
1165         if (dir_is_empty("/usr") <= 0)
1166                 return;
1167
1168         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1169                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1170                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1171 }
1172
1173 static void test_cgroups(void) {
1174
1175         if (access("/proc/cgroups", F_OK) >= 0)
1176                 return;
1177
1178         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1179                     "Systems without control groups are not supported. "
1180                     "We will now sleep for 10s, and then continue boot-up. "
1181                     "Expect breakage and please do not file bugs. "
1182                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1183                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1184
1185         sleep(10);
1186 }
1187
1188 static int initialize_join_controllers(void) {
1189         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1190          * + "net_prio". We'd like to add "cpuset" to the mix, but
1191          * "cpuset" does't really work for groups with no initialized
1192          * attributes. */
1193
1194         arg_join_controllers = new(char**, 3);
1195         if (!arg_join_controllers)
1196                 return -ENOMEM;
1197
1198         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1199         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1200         arg_join_controllers[2] = NULL;
1201
1202         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1203                 free_join_controllers();
1204                 return -ENOMEM;
1205         }
1206
1207         return 0;
1208 }
1209
1210 int main(int argc, char *argv[]) {
1211         Manager *m = NULL;
1212         int r, retval = EXIT_FAILURE;
1213         usec_t before_startup, after_startup;
1214         char timespan[FORMAT_TIMESPAN_MAX];
1215         FDSet *fds = NULL;
1216         bool reexecute = false;
1217         const char *shutdown_verb = NULL;
1218         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1219         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1220         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1221         static char systemd[] = "systemd";
1222         bool skip_setup = false;
1223         int j;
1224         bool loaded_policy = false;
1225         bool arm_reboot_watchdog = false;
1226         bool queue_default_job = false;
1227         char *switch_root_dir = NULL, *switch_root_init = NULL;
1228         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1229
1230 #ifdef HAVE_SYSV_COMPAT
1231         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1232                 /* This is compatibility support for SysV, where
1233                  * calling init as a user is identical to telinit. */
1234
1235                 errno = -ENOENT;
1236                 execv(SYSTEMCTL_BINARY_PATH, argv);
1237                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1238                 return 1;
1239         }
1240 #endif
1241
1242         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1243         dual_timestamp_get(&userspace_timestamp);
1244
1245         /* Determine if this is a reexecution or normal bootup. We do
1246          * the full command line parsing much later, so let's just
1247          * have a quick peek here. */
1248         if (strv_find(argv+1, "--deserialize"))
1249                 skip_setup = true;
1250
1251         /* If we have switched root, do all the special setup
1252          * things */
1253         if (strv_find(argv+1, "--switched-root"))
1254                 skip_setup = false;
1255
1256         /* If we get started via the /sbin/init symlink then we are
1257            called 'init'. After a subsequent reexecution we are then
1258            called 'systemd'. That is confusing, hence let's call us
1259            systemd right-away. */
1260         program_invocation_short_name = systemd;
1261         prctl(PR_SET_NAME, systemd);
1262
1263         saved_argv = argv;
1264         saved_argc = argc;
1265
1266         log_show_color(isatty(STDERR_FILENO) > 0);
1267
1268         if (getpid() == 1 && detect_container(NULL) <= 0) {
1269
1270                 /* Running outside of a container as PID 1 */
1271                 arg_running_as = SYSTEMD_SYSTEM;
1272                 make_null_stdio();
1273                 log_set_target(LOG_TARGET_KMSG);
1274                 log_open();
1275
1276                 if (in_initrd())
1277                         initrd_timestamp = userspace_timestamp;
1278
1279                 if (!skip_setup) {
1280                         mount_setup_early();
1281                         if (selinux_setup(&loaded_policy) < 0)
1282                                 goto finish;
1283                         if (ima_setup() < 0)
1284                                 goto finish;
1285                         if (smack_setup() < 0)
1286                                 goto finish;
1287                 }
1288
1289                 if (label_init(NULL) < 0)
1290                         goto finish;
1291
1292                 if (!skip_setup) {
1293                         if (hwclock_is_localtime() > 0) {
1294                                 int min;
1295
1296                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1297                                 r = hwclock_set_timezone(&min);
1298                                 if (r < 0)
1299                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1300                                 else
1301                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1302                         } else if (!in_initrd()) {
1303                                 /*
1304                                  * Do dummy first-time call to seal the kernel's time warp magic
1305                                  *
1306                                  * Do not call this this from inside the initrd. The initrd might not
1307                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1308                                  * that way. In such case, we need to delay the time-warp or the sealing
1309                                  * until we reach the real system.
1310                                  */
1311                                 hwclock_reset_timezone();
1312
1313                                 /* Tell the kernel our timezone */
1314                                 r = hwclock_set_timezone(NULL);
1315                                 if (r < 0)
1316                                         log_error("Failed to set the kernel's timezone, ignoring: %s", strerror(-r));
1317                         }
1318                 }
1319
1320                 /* Set the default for later on, but don't actually
1321                  * open the logs like this for now. Note that if we
1322                  * are transitioning from the initrd there might still
1323                  * be journal fd open, and we shouldn't attempt
1324                  * opening that before we parsed /proc/cmdline which
1325                  * might redirect output elsewhere. */
1326                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1327
1328         } else if (getpid() == 1) {
1329                 /* Running inside a container, as PID 1 */
1330                 arg_running_as = SYSTEMD_SYSTEM;
1331                 log_set_target(LOG_TARGET_CONSOLE);
1332                 log_open();
1333
1334                 /* For the later on, see above... */
1335                 log_set_target(LOG_TARGET_JOURNAL);
1336
1337                 /* clear the kernel timestamp,
1338                  * because we are in a container */
1339                 kernel_timestamp.monotonic = 0ULL;
1340                 kernel_timestamp.realtime = 0ULL;
1341
1342         } else {
1343                 /* Running as user instance */
1344                 arg_running_as = SYSTEMD_USER;
1345                 log_set_target(LOG_TARGET_AUTO);
1346                 log_open();
1347
1348                 /* clear the kernel timestamp,
1349                  * because we are not PID 1 */
1350                 kernel_timestamp.monotonic = 0ULL;
1351                 kernel_timestamp.realtime = 0ULL;
1352         }
1353
1354         /* Initialize default unit */
1355         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1356         if (r < 0) {
1357                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1358                 goto finish;
1359         }
1360
1361         r = initialize_join_controllers();
1362         if (r < 0)
1363                 goto finish;
1364
1365         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1366          * /proc/$PID/fd is available. */
1367         if (getpid() == 1) {
1368                 r = mount_setup(loaded_policy);
1369                 if (r < 0)
1370                         goto finish;
1371         }
1372
1373         /* Reset all signal handlers. */
1374         assert_se(reset_all_signal_handlers() == 0);
1375
1376         ignore_signals(SIGNALS_IGNORE, -1);
1377
1378         if (parse_config_file() < 0)
1379                 goto finish;
1380
1381         if (arg_running_as == SYSTEMD_SYSTEM)
1382                 if (parse_proc_cmdline() < 0)
1383                         goto finish;
1384
1385         log_parse_environment();
1386
1387         if (parse_argv(argc, argv) < 0)
1388                 goto finish;
1389
1390         if (arg_action == ACTION_TEST &&
1391             geteuid() == 0) {
1392                 log_error("Don't run test mode as root.");
1393                 goto finish;
1394         }
1395
1396         if (arg_running_as == SYSTEMD_USER &&
1397             arg_action == ACTION_RUN &&
1398             sd_booted() <= 0) {
1399                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1400                 goto finish;
1401         }
1402
1403         if (arg_running_as == SYSTEMD_SYSTEM &&
1404             arg_action == ACTION_RUN &&
1405             running_in_chroot() > 0) {
1406                 log_error("Cannot be run in a chroot() environment.");
1407                 goto finish;
1408         }
1409
1410         if (arg_action == ACTION_HELP) {
1411                 retval = help();
1412                 goto finish;
1413         } else if (arg_action == ACTION_VERSION) {
1414                 retval = version();
1415                 goto finish;
1416         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1417                 unit_dump_config_items(stdout);
1418                 retval = EXIT_SUCCESS;
1419                 goto finish;
1420         } else if (arg_action == ACTION_DONE) {
1421                 retval = EXIT_SUCCESS;
1422                 goto finish;
1423         }
1424
1425         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1426
1427         /* Close logging fds, in order not to confuse fdset below */
1428         log_close();
1429
1430         /* Remember open file descriptors for later deserialization */
1431         r = fdset_new_fill(&fds);
1432         if (r < 0) {
1433                 log_error("Failed to allocate fd set: %s", strerror(-r));
1434                 goto finish;
1435         } else
1436                 fdset_cloexec(fds, true);
1437
1438         if (serialization)
1439                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1440
1441         if (arg_running_as == SYSTEMD_SYSTEM) {
1442                 /* Become a session leader if we aren't one yet. */
1443                 setsid();
1444
1445                 /* Disable the umask logic */
1446                 umask(0);
1447         }
1448
1449         /* Move out of the way, so that we won't block unmounts */
1450         assert_se(chdir("/")  == 0);
1451
1452         /* Make sure D-Bus doesn't fiddle with the SIGPIPE handlers */
1453         dbus_connection_set_change_sigpipe(FALSE);
1454
1455         /* Reset the console, but only if this is really init and we
1456          * are freshly booted */
1457         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1458                 console_setup(getpid() == 1 && !skip_setup);
1459
1460         /* Open the logging devices, if possible and necessary */
1461         log_open();
1462
1463         /* Make sure we leave a core dump without panicing the
1464          * kernel. */
1465         if (getpid() == 1) {
1466                 install_crash_handler();
1467
1468                 r = mount_cgroup_controllers(arg_join_controllers);
1469                 if (r < 0)
1470                         goto finish;
1471         }
1472
1473         if (arg_running_as == SYSTEMD_SYSTEM) {
1474                 const char *virtualization = NULL;
1475
1476                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1477
1478                 detect_virtualization(&virtualization);
1479                 if (virtualization)
1480                         log_info("Detected virtualization '%s'.", virtualization);
1481
1482                 if (in_initrd())
1483                         log_info("Running in initial RAM disk.");
1484
1485         } else
1486                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1487
1488         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1489                 if (arg_show_status || plymouth_running())
1490                         status_welcome();
1491
1492 #ifdef HAVE_KMOD
1493                 kmod_setup();
1494 #endif
1495                 hostname_setup();
1496                 machine_id_setup();
1497                 loopback_setup();
1498
1499                 test_mtab();
1500                 test_usr();
1501                 test_cgroups();
1502         }
1503
1504         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1505                 watchdog_set_timeout(&arg_runtime_watchdog);
1506
1507         if (arg_timer_slack_nsec != (nsec_t) -1)
1508                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1509                         log_error("Failed to adjust timer slack: %m");
1510
1511         if (arg_capability_bounding_set_drop) {
1512                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1513                 if (r < 0) {
1514                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1515                         goto finish;
1516                 }
1517                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1518                 if (r < 0) {
1519                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1520                         goto finish;
1521                 }
1522         }
1523
1524         if (arg_running_as == SYSTEMD_USER) {
1525                 /* Become reaper of our children */
1526                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1527                         log_warning("Failed to make us a subreaper: %m");
1528                         if (errno == EINVAL)
1529                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1530                 }
1531         }
1532
1533         if (arg_running_as == SYSTEMD_SYSTEM)
1534                 bump_rlimit_nofile(&saved_rlimit_nofile);
1535
1536         r = manager_new(arg_running_as, !!serialization, &m);
1537         if (r < 0) {
1538                 log_error("Failed to allocate manager object: %s", strerror(-r));
1539                 goto finish;
1540         }
1541
1542         m->confirm_spawn = arg_confirm_spawn;
1543         m->default_std_output = arg_default_std_output;
1544         m->default_std_error = arg_default_std_error;
1545         m->runtime_watchdog = arg_runtime_watchdog;
1546         m->shutdown_watchdog = arg_shutdown_watchdog;
1547         m->userspace_timestamp = userspace_timestamp;
1548         m->kernel_timestamp = kernel_timestamp;
1549         m->initrd_timestamp = initrd_timestamp;
1550
1551         manager_set_default_rlimits(m, arg_default_rlimit);
1552
1553         if (arg_default_environment)
1554                 manager_environment_add(m, arg_default_environment);
1555
1556         manager_set_show_status(m, arg_show_status);
1557
1558         /* Remember whether we should queue the default job */
1559         queue_default_job = !serialization || arg_switched_root;
1560
1561         before_startup = now(CLOCK_MONOTONIC);
1562
1563         r = manager_startup(m, serialization, fds);
1564         if (r < 0)
1565                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1566
1567         /* This will close all file descriptors that were opened, but
1568          * not claimed by any unit. */
1569         fdset_free(fds);
1570
1571         if (serialization) {
1572                 fclose(serialization);
1573                 serialization = NULL;
1574         }
1575
1576         if (queue_default_job) {
1577                 DBusError error;
1578                 Unit *target = NULL;
1579                 Job *default_unit_job;
1580
1581                 dbus_error_init(&error);
1582
1583                 log_debug("Activating default unit: %s", arg_default_unit);
1584
1585                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1586                 if (r < 0) {
1587                         log_error("Failed to load default target: %s", bus_error(&error, r));
1588                         dbus_error_free(&error);
1589                 } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND)
1590                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1591                 else if (target->load_state == UNIT_MASKED)
1592                         log_error("Default target masked.");
1593
1594                 if (!target || target->load_state != UNIT_LOADED) {
1595                         log_info("Trying to load rescue target...");
1596
1597                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1598                         if (r < 0) {
1599                                 log_error("Failed to load rescue target: %s", bus_error(&error, r));
1600                                 dbus_error_free(&error);
1601                                 goto finish;
1602                         } else if (target->load_state == UNIT_ERROR || target->load_state == UNIT_NOT_FOUND) {
1603                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1604                                 goto finish;
1605                         } else if (target->load_state == UNIT_MASKED) {
1606                                 log_error("Rescue target masked.");
1607                                 goto finish;
1608                         }
1609                 }
1610
1611                 assert(target->load_state == UNIT_LOADED);
1612
1613                 if (arg_action == ACTION_TEST) {
1614                         printf("-> By units:\n");
1615                         manager_dump_units(m, stdout, "\t");
1616                 }
1617
1618                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1619                 if (r == -EPERM) {
1620                         log_debug("Default target could not be isolated, starting instead: %s", bus_error(&error, r));
1621                         dbus_error_free(&error);
1622
1623                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1624                         if (r < 0) {
1625                                 log_error("Failed to start default target: %s", bus_error(&error, r));
1626                                 dbus_error_free(&error);
1627                                 goto finish;
1628                         }
1629                 } else if (r < 0) {
1630                         log_error("Failed to isolate default target: %s", bus_error(&error, r));
1631                         dbus_error_free(&error);
1632                         goto finish;
1633                 }
1634
1635                 m->default_unit_job_id = default_unit_job->id;
1636
1637                 after_startup = now(CLOCK_MONOTONIC);
1638                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1639                          "Loaded units and determined initial transaction in %s.",
1640                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1641
1642                 if (arg_action == ACTION_TEST) {
1643                         printf("-> By jobs:\n");
1644                         manager_dump_jobs(m, stdout, "\t");
1645                         retval = EXIT_SUCCESS;
1646                         goto finish;
1647                 }
1648         }
1649
1650         for (;;) {
1651                 r = manager_loop(m);
1652                 if (r < 0) {
1653                         log_error("Failed to run mainloop: %s", strerror(-r));
1654                         goto finish;
1655                 }
1656
1657                 switch (m->exit_code) {
1658
1659                 case MANAGER_EXIT:
1660                         retval = EXIT_SUCCESS;
1661                         log_debug("Exit.");
1662                         goto finish;
1663
1664                 case MANAGER_RELOAD:
1665                         log_info("Reloading.");
1666                         r = manager_reload(m);
1667                         if (r < 0)
1668                                 log_error("Failed to reload: %s", strerror(-r));
1669                         break;
1670
1671                 case MANAGER_REEXECUTE:
1672
1673                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1674                                 goto finish;
1675
1676                         reexecute = true;
1677                         log_notice("Reexecuting.");
1678                         goto finish;
1679
1680                 case MANAGER_SWITCH_ROOT:
1681                         /* Steal the switch root parameters */
1682                         switch_root_dir = m->switch_root;
1683                         switch_root_init = m->switch_root_init;
1684                         m->switch_root = m->switch_root_init = NULL;
1685
1686                         if (!switch_root_init)
1687                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1688                                         goto finish;
1689
1690                         reexecute = true;
1691                         log_notice("Switching root.");
1692                         goto finish;
1693
1694                 case MANAGER_REBOOT:
1695                 case MANAGER_POWEROFF:
1696                 case MANAGER_HALT:
1697                 case MANAGER_KEXEC: {
1698                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1699                                 [MANAGER_REBOOT] = "reboot",
1700                                 [MANAGER_POWEROFF] = "poweroff",
1701                                 [MANAGER_HALT] = "halt",
1702                                 [MANAGER_KEXEC] = "kexec"
1703                         };
1704
1705                         assert_se(shutdown_verb = table[m->exit_code]);
1706                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1707
1708                         log_notice("Shutting down.");
1709                         goto finish;
1710                 }
1711
1712                 default:
1713                         assert_not_reached("Unknown exit code.");
1714                 }
1715         }
1716
1717 finish:
1718         if (m)
1719                 manager_free(m);
1720
1721         for (j = 0; j < RLIMIT_NLIMITS; j++)
1722                 free(arg_default_rlimit[j]);
1723
1724         free(arg_default_unit);
1725         free_join_controllers();
1726
1727         dbus_shutdown();
1728         label_finish();
1729
1730         if (reexecute) {
1731                 const char **args;
1732                 unsigned i, args_size;
1733
1734                 /* Close and disarm the watchdog, so that the new
1735                  * instance can reinitialize it, but doesn't get
1736                  * rebooted while we do that */
1737                 watchdog_close(true);
1738
1739                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1740                  * that the new systemd can pass the kernel default to
1741                  * its child processes */
1742                 if (saved_rlimit_nofile.rlim_cur > 0)
1743                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1744
1745                 if (switch_root_dir) {
1746                         /* Kill all remaining processes from the
1747                          * initrd, but don't wait for them, so that we
1748                          * can handle the SIGCHLD for them after
1749                          * deserializing. */
1750                         broadcast_signal(SIGTERM, false);
1751
1752                         /* And switch root */
1753                         r = switch_root(switch_root_dir);
1754                         if (r < 0)
1755                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1756                 }
1757
1758                 args_size = MAX(6, argc+1);
1759                 args = newa(const char*, args_size);
1760
1761                 if (!switch_root_init) {
1762                         char sfd[16];
1763
1764                         /* First try to spawn ourselves with the right
1765                          * path, and with full serialization. We do
1766                          * this only if the user didn't specify an
1767                          * explicit init to spawn. */
1768
1769                         assert(serialization);
1770                         assert(fds);
1771
1772                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1773                         char_array_0(sfd);
1774
1775                         i = 0;
1776                         args[i++] = SYSTEMD_BINARY_PATH;
1777                         if (switch_root_dir)
1778                                 args[i++] = "--switched-root";
1779                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1780                         args[i++] = "--deserialize";
1781                         args[i++] = sfd;
1782                         args[i++] = NULL;
1783
1784                         /* do not pass along the environment we inherit from the kernel or initrd */
1785                         if (switch_root_dir)
1786                                 clearenv();
1787
1788                         assert(i <= args_size);
1789                         execv(args[0], (char* const*) args);
1790                 }
1791
1792                 /* Try the fallback, if there is any, without any
1793                  * serialization. We pass the original argv[] and
1794                  * envp[]. (Well, modulo the ordering changes due to
1795                  * getopt() in argv[], and some cleanups in envp[],
1796                  * but let's hope that doesn't matter.) */
1797
1798                 if (serialization) {
1799                         fclose(serialization);
1800                         serialization = NULL;
1801                 }
1802
1803                 if (fds) {
1804                         fdset_free(fds);
1805                         fds = NULL;
1806                 }
1807
1808                 /* Reopen the console */
1809                 make_console_stdio();
1810
1811                 for (j = 1, i = 1; j < argc; j++)
1812                         args[i++] = argv[j];
1813                 args[i++] = NULL;
1814                 assert(i <= args_size);
1815
1816                 if (switch_root_init) {
1817                         args[0] = switch_root_init;
1818                         execv(args[0], (char* const*) args);
1819                         log_warning("Failed to execute configured init, trying fallback: %m");
1820                 }
1821
1822                 args[0] = "/sbin/init";
1823                 execv(args[0], (char* const*) args);
1824
1825                 if (errno == ENOENT) {
1826                         log_warning("No /sbin/init, trying fallback");
1827
1828                         args[0] = "/bin/sh";
1829                         args[1] = NULL;
1830                         execv(args[0], (char* const*) args);
1831                         log_error("Failed to execute /bin/sh, giving up: %m");
1832                 } else
1833                         log_warning("Failed to execute /sbin/init, giving up: %m");
1834         }
1835
1836         if (serialization)
1837                 fclose(serialization);
1838
1839         if (fds)
1840                 fdset_free(fds);
1841
1842         if (shutdown_verb) {
1843                 const char * command_line[] = {
1844                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1845                         shutdown_verb,
1846                         NULL
1847                 };
1848                 char **env_block;
1849
1850                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1851                         char e[32];
1852
1853                         /* If we reboot let's set the shutdown
1854                          * watchdog and tell the shutdown binary to
1855                          * repeatedly ping it */
1856                         watchdog_set_timeout(&arg_shutdown_watchdog);
1857                         watchdog_close(false);
1858
1859                         /* Tell the binary how often to ping */
1860                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1861                         char_array_0(e);
1862
1863                         env_block = strv_append(environ, e);
1864                 } else {
1865                         env_block = strv_copy(environ);
1866                         watchdog_close(true);
1867                 }
1868
1869                 /* Avoid the creation of new processes forked by the
1870                  * kernel; at this point, we will not listen to the
1871                  * signals anyway */
1872                 if (detect_container(NULL) <= 0)
1873                         cg_uninstall_release_agent(SYSTEMD_CGROUP_CONTROLLER);
1874
1875                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1876                 free(env_block);
1877                 log_error("Failed to execute shutdown binary, freezing: %m");
1878         }
1879
1880         if (getpid() == 1)
1881                 freeze();
1882
1883         return retval;
1884 }