chiark / gitweb /
libsystemd-bus/bus-kernel.h: set MEMFD_MIN_SIZE to 128k
[elogind.git] / src / core / main.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <dbus/dbus.h>
23
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <getopt.h>
31 #include <signal.h>
32 #include <sys/wait.h>
33 #include <fcntl.h>
34 #include <sys/prctl.h>
35 #include <sys/mount.h>
36
37 #include "manager.h"
38 #include "log.h"
39 #include "load-fragment.h"
40 #include "fdset.h"
41 #include "special.h"
42 #include "conf-parser.h"
43 #include "dbus-common.h"
44 #include "missing.h"
45 #include "label.h"
46 #include "build.h"
47 #include "strv.h"
48 #include "def.h"
49 #include "virt.h"
50 #include "watchdog.h"
51 #include "path-util.h"
52 #include "switch-root.h"
53 #include "capability.h"
54 #include "killall.h"
55 #include "env-util.h"
56 #include "hwclock.h"
57 #include "sd-daemon.h"
58 #include "sd-messages.h"
59
60 #include "mount-setup.h"
61 #include "loopback-setup.h"
62 #ifdef HAVE_KMOD
63 #include "kmod-setup.h"
64 #endif
65 #include "hostname-setup.h"
66 #include "machine-id-setup.h"
67 #include "locale-setup.h"
68 #include "selinux-setup.h"
69 #include "ima-setup.h"
70 #include "fileio.h"
71 #include "smack-setup.h"
72
73 static enum {
74         ACTION_RUN,
75         ACTION_HELP,
76         ACTION_VERSION,
77         ACTION_TEST,
78         ACTION_DUMP_CONFIGURATION_ITEMS,
79         ACTION_DONE
80 } arg_action = ACTION_RUN;
81
82 static char *arg_default_unit = NULL;
83 static SystemdRunningAs arg_running_as = _SYSTEMD_RUNNING_AS_INVALID;
84
85 static bool arg_dump_core = true;
86 static bool arg_crash_shell = false;
87 static int arg_crash_chvt = -1;
88 static bool arg_confirm_spawn = false;
89 static bool arg_show_status = true;
90 static bool arg_switched_root = false;
91 static char **arg_default_controllers = NULL;
92 static char ***arg_join_controllers = NULL;
93 static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
94 static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
95 static usec_t arg_runtime_watchdog = 0;
96 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
97 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
98 static uint64_t arg_capability_bounding_set_drop = 0;
99 static nsec_t arg_timer_slack_nsec = (nsec_t) -1;
100
101 static FILE* serialization = NULL;
102
103 static void nop_handler(int sig) {
104 }
105
106 _noreturn_ static void crash(int sig) {
107
108         if (!arg_dump_core)
109                 log_error("Caught <%s>, not dumping core.", signal_to_string(sig));
110         else {
111                 struct sigaction sa = {
112                         .sa_handler = nop_handler,
113                         .sa_flags = SA_NOCLDSTOP|SA_RESTART,
114                 };
115                 pid_t pid;
116
117                 /* We want to wait for the core process, hence let's enable SIGCHLD */
118                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
119
120                 pid = fork();
121                 if (pid < 0)
122                         log_error("Caught <%s>, cannot fork for core dump: %s", signal_to_string(sig), strerror(errno));
123
124                 else if (pid == 0) {
125                         struct rlimit rl = {};
126
127                         /* Enable default signal handler for core dump */
128                         zero(sa);
129                         sa.sa_handler = SIG_DFL;
130                         assert_se(sigaction(sig, &sa, NULL) == 0);
131
132                         /* Don't limit the core dump size */
133                         rl.rlim_cur = RLIM_INFINITY;
134                         rl.rlim_max = RLIM_INFINITY;
135                         setrlimit(RLIMIT_CORE, &rl);
136
137                         /* Just to be sure... */
138                         assert_se(chdir("/") == 0);
139
140                         /* Raise the signal again */
141                         raise(sig);
142
143                         assert_not_reached("We shouldn't be here...");
144                         _exit(1);
145
146                 } else {
147                         siginfo_t status;
148                         int r;
149
150                         /* Order things nicely. */
151                         r = wait_for_terminate(pid, &status);
152                         if (r < 0)
153                                 log_error("Caught <%s>, waitpid() failed: %s", signal_to_string(sig), strerror(-r));
154                         else if (status.si_code != CLD_DUMPED)
155                                 log_error("Caught <%s>, core dump failed.", signal_to_string(sig));
156                         else
157                                 log_error("Caught <%s>, dumped core as pid %lu.", signal_to_string(sig), (unsigned long) pid);
158                 }
159         }
160
161         if (arg_crash_chvt)
162                 chvt(arg_crash_chvt);
163
164         if (arg_crash_shell) {
165                 struct sigaction sa = {
166                         .sa_handler = SIG_IGN,
167                         .sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT|SA_RESTART,
168                 };
169                 pid_t pid;
170
171                 log_info("Executing crash shell in 10s...");
172                 sleep(10);
173
174                 /* Let the kernel reap children for us */
175                 assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
176
177                 pid = fork();
178                 if (pid < 0)
179                         log_error("Failed to fork off crash shell: %m");
180                 else if (pid == 0) {
181                         make_console_stdio();
182                         execl("/bin/sh", "/bin/sh", NULL);
183
184                         log_error("execl() failed: %m");
185                         _exit(1);
186                 }
187
188                 log_info("Successfully spawned crash shell as pid %lu.", (unsigned long) pid);
189         }
190
191         log_info("Freezing execution.");
192         freeze();
193 }
194
195 static void install_crash_handler(void) {
196         struct sigaction sa = {
197                 .sa_handler = crash,
198                 .sa_flags = SA_NODEFER,
199         };
200
201         sigaction_many(&sa, SIGNALS_CRASH_HANDLER, -1);
202 }
203
204 static int console_setup(bool do_reset) {
205         int tty_fd, r;
206
207         /* If we are init, we connect stdin/stdout/stderr to /dev/null
208          * and make sure we don't have a controlling tty. */
209
210         release_terminal();
211
212         if (!do_reset)
213                 return 0;
214
215         tty_fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
216         if (tty_fd < 0) {
217                 log_error("Failed to open /dev/console: %s", strerror(-tty_fd));
218                 return -tty_fd;
219         }
220
221         /* We don't want to force text mode.
222          * plymouth may be showing pictures already from initrd. */
223         r = reset_terminal_fd(tty_fd, false);
224         if (r < 0)
225                 log_error("Failed to reset /dev/console: %s", strerror(-r));
226
227         close_nointr_nofail(tty_fd);
228         return r;
229 }
230
231 static int set_default_unit(const char *u) {
232         char *c;
233
234         assert(u);
235
236         c = strdup(u);
237         if (!c)
238                 return -ENOMEM;
239
240         free(arg_default_unit);
241         arg_default_unit = c;
242
243         return 0;
244 }
245
246 static int parse_proc_cmdline_word(const char *word) {
247
248         static const char * const rlmap[] = {
249                 "emergency", SPECIAL_EMERGENCY_TARGET,
250                 "-b",        SPECIAL_EMERGENCY_TARGET,
251                 "single",    SPECIAL_RESCUE_TARGET,
252                 "-s",        SPECIAL_RESCUE_TARGET,
253                 "s",         SPECIAL_RESCUE_TARGET,
254                 "S",         SPECIAL_RESCUE_TARGET,
255                 "1",         SPECIAL_RESCUE_TARGET,
256                 "2",         SPECIAL_RUNLEVEL2_TARGET,
257                 "3",         SPECIAL_RUNLEVEL3_TARGET,
258                 "4",         SPECIAL_RUNLEVEL4_TARGET,
259                 "5",         SPECIAL_RUNLEVEL5_TARGET,
260         };
261
262         assert(word);
263
264         if (startswith(word, "systemd.unit=")) {
265
266                 if (!in_initrd())
267                         return set_default_unit(word + 13);
268
269         } else if (startswith(word, "rd.systemd.unit=")) {
270
271                 if (in_initrd())
272                         return set_default_unit(word + 16);
273
274         } else if (startswith(word, "systemd.log_target=")) {
275
276                 if (log_set_target_from_string(word + 19) < 0)
277                         log_warning("Failed to parse log target %s. Ignoring.", word + 19);
278
279         } else if (startswith(word, "systemd.log_level=")) {
280
281                 if (log_set_max_level_from_string(word + 18) < 0)
282                         log_warning("Failed to parse log level %s. Ignoring.", word + 18);
283
284         } else if (startswith(word, "systemd.log_color=")) {
285
286                 if (log_show_color_from_string(word + 18) < 0)
287                         log_warning("Failed to parse log color setting %s. Ignoring.", word + 18);
288
289         } else if (startswith(word, "systemd.log_location=")) {
290
291                 if (log_show_location_from_string(word + 21) < 0)
292                         log_warning("Failed to parse log location setting %s. Ignoring.", word + 21);
293
294         } else if (startswith(word, "systemd.dump_core=")) {
295                 int r;
296
297                 if ((r = parse_boolean(word + 18)) < 0)
298                         log_warning("Failed to parse dump core switch %s. Ignoring.", word + 18);
299                 else
300                         arg_dump_core = r;
301
302         } else if (startswith(word, "systemd.crash_shell=")) {
303                 int r;
304
305                 if ((r = parse_boolean(word + 20)) < 0)
306                         log_warning("Failed to parse crash shell switch %s. Ignoring.", word + 20);
307                 else
308                         arg_crash_shell = r;
309
310         } else if (startswith(word, "systemd.confirm_spawn=")) {
311                 int r;
312
313                 if ((r = parse_boolean(word + 22)) < 0)
314                         log_warning("Failed to parse confirm spawn switch %s. Ignoring.", word + 22);
315                 else
316                         arg_confirm_spawn = r;
317
318         } else if (startswith(word, "systemd.crash_chvt=")) {
319                 int k;
320
321                 if (safe_atoi(word + 19, &k) < 0)
322                         log_warning("Failed to parse crash chvt switch %s. Ignoring.", word + 19);
323                 else
324                         arg_crash_chvt = k;
325
326         } else if (startswith(word, "systemd.show_status=")) {
327                 int r;
328
329                 if ((r = parse_boolean(word + 20)) < 0)
330                         log_warning("Failed to parse show status switch %s. Ignoring.", word + 20);
331                 else
332                         arg_show_status = r;
333         } else if (startswith(word, "systemd.default_standard_output=")) {
334                 int r;
335
336                 if ((r = exec_output_from_string(word + 32)) < 0)
337                         log_warning("Failed to parse default standard output switch %s. Ignoring.", word + 32);
338                 else
339                         arg_default_std_output = r;
340         } else if (startswith(word, "systemd.default_standard_error=")) {
341                 int r;
342
343                 if ((r = exec_output_from_string(word + 31)) < 0)
344                         log_warning("Failed to parse default standard error switch %s. Ignoring.", word + 31);
345                 else
346                         arg_default_std_error = r;
347         } else if (startswith(word, "systemd.setenv=")) {
348                 _cleanup_free_ char *cenv = NULL;
349                 char *eq;
350                 int r;
351
352                 cenv = strdup(word + 15);
353                 if (!cenv)
354                         return -ENOMEM;
355
356                 eq = strchr(cenv, '=');
357                 if (!eq) {
358                         if (!env_name_is_valid(cenv))
359                                 log_warning("Environment variable name '%s' is not valid. Ignoring.", cenv);
360                         else  {
361                                 r = unsetenv(cenv);
362                                 if (r < 0)
363                                         log_warning("Unsetting environment variable '%s' failed, ignoring: %m", cenv);
364                         }
365                 } else {
366                         if (!env_assignment_is_valid(cenv))
367                                 log_warning("Environment variable assignment '%s' is not valid. Ignoring.", cenv);
368                         else {
369                                 *eq = 0;
370                                 r = setenv(cenv, eq + 1, 1);
371                                 if (r < 0)
372                                         log_warning("Setting environment variable '%s=%s' failed, ignoring: %m", cenv, eq + 1);
373                         }
374                 }
375
376         } else if (startswith(word, "systemd.") ||
377                    (in_initrd() && startswith(word, "rd.systemd."))) {
378
379                 const char *c;
380
381                 /* Ignore systemd.journald.xyz and friends */
382                 c = word;
383                 if (startswith(c, "rd."))
384                         c += 3;
385                 if (startswith(c, "systemd."))
386                         c += 8;
387                 if (c[strcspn(c, ".=")] != '.')  {
388
389                         log_warning("Unknown kernel switch %s. Ignoring.", word);
390
391                         log_info("Supported kernel switches:\n"
392                                  "systemd.unit=UNIT                        Default unit to start\n"
393                                  "rd.systemd.unit=UNIT                     Default unit to start when run in initrd\n"
394                                  "systemd.dump_core=0|1                    Dump core on crash\n"
395                                  "systemd.crash_shell=0|1                  Run shell on crash\n"
396                                  "systemd.crash_chvt=N                     Change to VT #N on crash\n"
397                                  "systemd.confirm_spawn=0|1                Confirm every process spawn\n"
398                                  "systemd.show_status=0|1                  Show status updates on the console during bootup\n"
399                                  "systemd.log_target=console|kmsg|journal|journal-or-kmsg|syslog|syslog-or-kmsg|null\n"
400                                  "                                         Log target\n"
401                                  "systemd.log_level=LEVEL                  Log level\n"
402                                  "systemd.log_color=0|1                    Highlight important log messages\n"
403                                  "systemd.log_location=0|1                 Include code location in log messages\n"
404                                  "systemd.default_standard_output=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
405                                  "                                         Set default log output for services\n"
406                                  "systemd.default_standard_error=null|tty|syslog|syslog+console|kmsg|kmsg+console|journal|journal+console\n"
407                                  "                                         Set default log error output for services\n"
408                                  "systemd.setenv=ASSIGNMENT                Set an environment variable for all spawned processes\n");
409                 }
410
411         } else if (streq(word, "quiet"))
412                 arg_show_status = false;
413         else if (streq(word, "debug"))
414                 log_set_max_level(LOG_DEBUG);
415         else if (!in_initrd()) {
416                 unsigned i;
417
418                 /* SysV compatibility */
419                 for (i = 0; i < ELEMENTSOF(rlmap); i += 2)
420                         if (streq(word, rlmap[i]))
421                                 return set_default_unit(rlmap[i+1]);
422         }
423
424         return 0;
425 }
426
427 #define DEFINE_SETTER(name, func, descr)                              \
428         static int name(const char *unit,                             \
429                         const char *filename,                         \
430                         unsigned line,                                \
431                         const char *section,                          \
432                         const char *lvalue,                           \
433                         int ltype,                                    \
434                         const char *rvalue,                           \
435                         void *data,                                   \
436                         void *userdata) {                             \
437                                                                       \
438                 int r;                                                \
439                                                                       \
440                 assert(filename);                                     \
441                 assert(lvalue);                                       \
442                 assert(rvalue);                                       \
443                                                                       \
444                 r = func(rvalue);                                     \
445                 if (r < 0)                                            \
446                         log_syntax(unit, LOG_ERR, filename, line, -r, \
447                                    "Invalid " descr "'%s': %s",       \
448                                    rvalue, strerror(-r));             \
449                                                                       \
450                 return 0;                                             \
451         }
452
453 DEFINE_SETTER(config_parse_level2, log_set_max_level_from_string, "log level")
454 DEFINE_SETTER(config_parse_target, log_set_target_from_string, "target")
455 DEFINE_SETTER(config_parse_color, log_show_color_from_string, "color" )
456 DEFINE_SETTER(config_parse_location, log_show_location_from_string, "location")
457
458
459 static int config_parse_cpu_affinity2(const char *unit,
460                                       const char *filename,
461                                       unsigned line,
462                                       const char *section,
463                                       const char *lvalue,
464                                       int ltype,
465                                       const char *rvalue,
466                                       void *data,
467                                       void *userdata) {
468
469         char *w;
470         size_t l;
471         char *state;
472         cpu_set_t *c = NULL;
473         unsigned ncpus = 0;
474
475         assert(filename);
476         assert(lvalue);
477         assert(rvalue);
478
479         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
480                 char *t;
481                 int r;
482                 unsigned cpu;
483
484                 if (!(t = strndup(w, l)))
485                         return log_oom();
486
487                 r = safe_atou(t, &cpu);
488                 free(t);
489
490                 if (!c)
491                         if (!(c = cpu_set_malloc(&ncpus)))
492                                 return log_oom();
493
494                 if (r < 0 || cpu >= ncpus) {
495                         log_syntax(unit, LOG_ERR, filename, line, -r,
496                                    "Failed to parse CPU affinity '%s'", rvalue);
497                         CPU_FREE(c);
498                         return -EBADMSG;
499                 }
500
501                 CPU_SET_S(cpu, CPU_ALLOC_SIZE(ncpus), c);
502         }
503
504         if (c) {
505                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(ncpus), c) < 0)
506                         log_warning_unit(unit, "Failed to set CPU affinity: %m");
507
508                 CPU_FREE(c);
509         }
510
511         return 0;
512 }
513
514 static void strv_free_free(char ***l) {
515         char ***i;
516
517         if (!l)
518                 return;
519
520         for (i = l; *i; i++)
521                 strv_free(*i);
522
523         free(l);
524 }
525
526 static void free_join_controllers(void) {
527         strv_free_free(arg_join_controllers);
528         arg_join_controllers = NULL;
529 }
530
531 static int config_parse_join_controllers(const char *unit,
532                                          const char *filename,
533                                          unsigned line,
534                                          const char *section,
535                                          const char *lvalue,
536                                          int ltype,
537                                          const char *rvalue,
538                                          void *data,
539                                          void *userdata) {
540
541         unsigned n = 0;
542         char *state, *w;
543         size_t length;
544
545         assert(filename);
546         assert(lvalue);
547         assert(rvalue);
548
549         free_join_controllers();
550
551         FOREACH_WORD_QUOTED(w, length, rvalue, state) {
552                 char *s, **l;
553
554                 s = strndup(w, length);
555                 if (!s)
556                         return log_oom();
557
558                 l = strv_split(s, ",");
559                 free(s);
560
561                 strv_uniq(l);
562
563                 if (strv_length(l) <= 1) {
564                         strv_free(l);
565                         continue;
566                 }
567
568                 if (!arg_join_controllers) {
569                         arg_join_controllers = new(char**, 2);
570                         if (!arg_join_controllers) {
571                                 strv_free(l);
572                                 return log_oom();
573                         }
574
575                         arg_join_controllers[0] = l;
576                         arg_join_controllers[1] = NULL;
577
578                         n = 1;
579                 } else {
580                         char ***a;
581                         char ***t;
582
583                         t = new0(char**, n+2);
584                         if (!t) {
585                                 strv_free(l);
586                                 return log_oom();
587                         }
588
589                         n = 0;
590
591                         for (a = arg_join_controllers; *a; a++) {
592
593                                 if (strv_overlap(*a, l)) {
594                                         char **c;
595
596                                         c = strv_merge(*a, l);
597                                         if (!c) {
598                                                 strv_free(l);
599                                                 strv_free_free(t);
600                                                 return log_oom();
601                                         }
602
603                                         strv_free(l);
604                                         l = c;
605                                 } else {
606                                         char **c;
607
608                                         c = strv_copy(*a);
609                                         if (!c) {
610                                                 strv_free(l);
611                                                 strv_free_free(t);
612                                                 return log_oom();
613                                         }
614
615                                         t[n++] = c;
616                                 }
617                         }
618
619                         t[n++] = strv_uniq(l);
620
621                         strv_free_free(arg_join_controllers);
622                         arg_join_controllers = t;
623                 }
624         }
625
626         return 0;
627 }
628
629 static int parse_config_file(void) {
630
631         const ConfigTableItem items[] = {
632                 { "Manager", "LogLevel",              config_parse_level2,       0, NULL                     },
633                 { "Manager", "LogTarget",             config_parse_target,       0, NULL                     },
634                 { "Manager", "LogColor",              config_parse_color,        0, NULL                     },
635                 { "Manager", "LogLocation",           config_parse_location,     0, NULL                     },
636                 { "Manager", "DumpCore",              config_parse_bool,         0, &arg_dump_core           },
637                 { "Manager", "CrashShell",            config_parse_bool,         0, &arg_crash_shell         },
638                 { "Manager", "ShowStatus",            config_parse_bool,         0, &arg_show_status         },
639                 { "Manager", "CrashChVT",             config_parse_int,          0, &arg_crash_chvt          },
640                 { "Manager", "CPUAffinity",           config_parse_cpu_affinity2, 0, NULL                    },
641                 { "Manager", "DefaultControllers",    config_parse_strv,         0, &arg_default_controllers },
642                 { "Manager", "DefaultStandardOutput", config_parse_output,       0, &arg_default_std_output  },
643                 { "Manager", "DefaultStandardError",  config_parse_output,       0, &arg_default_std_error   },
644                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
645                 { "Manager", "RuntimeWatchdogSec",    config_parse_sec,          0, &arg_runtime_watchdog    },
646                 { "Manager", "ShutdownWatchdogSec",   config_parse_sec,          0, &arg_shutdown_watchdog   },
647                 { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
648                 { "Manager", "TimerSlackNSec",        config_parse_nsec,         0, &arg_timer_slack_nsec    },
649                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
650                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
651                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
652                 { "Manager", "DefaultLimitSTACK",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_STACK]},
653                 { "Manager", "DefaultLimitCORE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CORE]},
654                 { "Manager", "DefaultLimitRSS",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RSS]},
655                 { "Manager", "DefaultLimitNOFILE",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NOFILE]},
656                 { "Manager", "DefaultLimitAS",        config_parse_limit,        0, &arg_default_rlimit[RLIMIT_AS]},
657                 { "Manager", "DefaultLimitNPROC",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NPROC]},
658                 { "Manager", "DefaultLimitMEMLOCK",   config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MEMLOCK]},
659                 { "Manager", "DefaultLimitLOCKS",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_LOCKS]},
660                 { "Manager", "DefaultLimitSIGPENDING",config_parse_limit,        0, &arg_default_rlimit[RLIMIT_SIGPENDING]},
661                 { "Manager", "DefaultLimitMSGQUEUE",  config_parse_limit,        0, &arg_default_rlimit[RLIMIT_MSGQUEUE]},
662                 { "Manager", "DefaultLimitNICE",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_NICE]},
663                 { "Manager", "DefaultLimitRTPRIO",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTPRIO]},
664                 { "Manager", "DefaultLimitRTTIME",    config_parse_limit,        0, &arg_default_rlimit[RLIMIT_RTTIME]},
665                 { NULL, NULL, NULL, 0, NULL }
666         };
667
668         _cleanup_fclose_ FILE *f;
669         const char *fn;
670         int r;
671
672         fn = arg_running_as == SYSTEMD_SYSTEM ? PKGSYSCONFDIR "/system.conf" : PKGSYSCONFDIR "/user.conf";
673         f = fopen(fn, "re");
674         if (!f) {
675                 if (errno == ENOENT)
676                         return 0;
677
678                 log_warning("Failed to open configuration file '%s': %m", fn);
679                 return 0;
680         }
681
682         r = config_parse(NULL, fn, f, "Manager\0", config_item_table_lookup, (void*) items, false, false, NULL);
683         if (r < 0)
684                 log_warning("Failed to parse configuration file: %s", strerror(-r));
685
686         return 0;
687 }
688
689 static int parse_proc_cmdline(void) {
690         _cleanup_free_ char *line = NULL;
691         char *w, *state;
692         int r;
693         size_t l;
694
695         /* Don't read /proc/cmdline if we are in a container, since
696          * that is only relevant for the host system */
697         if (detect_container(NULL) > 0)
698                 return 0;
699
700         r = read_one_line_file("/proc/cmdline", &line);
701         if (r < 0) {
702                 log_warning("Failed to read /proc/cmdline, ignoring: %s", strerror(-r));
703                 return 0;
704         }
705
706         FOREACH_WORD_QUOTED(w, l, line, state) {
707                 _cleanup_free_ char *word;
708
709                 word = strndup(w, l);
710                 if (!word)
711                         return log_oom();
712
713                 r = parse_proc_cmdline_word(word);
714                 if (r < 0) {
715                         log_error("Failed on cmdline argument %s: %s", word, strerror(-r));
716                         return r;
717                 }
718         }
719
720         return 0;
721 }
722
723 static int parse_argv(int argc, char *argv[]) {
724
725         enum {
726                 ARG_LOG_LEVEL = 0x100,
727                 ARG_LOG_TARGET,
728                 ARG_LOG_COLOR,
729                 ARG_LOG_LOCATION,
730                 ARG_UNIT,
731                 ARG_SYSTEM,
732                 ARG_USER,
733                 ARG_TEST,
734                 ARG_VERSION,
735                 ARG_DUMP_CONFIGURATION_ITEMS,
736                 ARG_DUMP_CORE,
737                 ARG_CRASH_SHELL,
738                 ARG_CONFIRM_SPAWN,
739                 ARG_SHOW_STATUS,
740                 ARG_DESERIALIZE,
741                 ARG_SWITCHED_ROOT,
742                 ARG_INTROSPECT,
743                 ARG_DEFAULT_STD_OUTPUT,
744                 ARG_DEFAULT_STD_ERROR
745         };
746
747         static const struct option options[] = {
748                 { "log-level",                required_argument, NULL, ARG_LOG_LEVEL                },
749                 { "log-target",               required_argument, NULL, ARG_LOG_TARGET               },
750                 { "log-color",                optional_argument, NULL, ARG_LOG_COLOR                },
751                 { "log-location",             optional_argument, NULL, ARG_LOG_LOCATION             },
752                 { "unit",                     required_argument, NULL, ARG_UNIT                     },
753                 { "system",                   no_argument,       NULL, ARG_SYSTEM                   },
754                 { "user",                     no_argument,       NULL, ARG_USER                     },
755                 { "test",                     no_argument,       NULL, ARG_TEST                     },
756                 { "help",                     no_argument,       NULL, 'h'                          },
757                 { "version",                  no_argument,       NULL, ARG_VERSION                  },
758                 { "dump-configuration-items", no_argument,       NULL, ARG_DUMP_CONFIGURATION_ITEMS },
759                 { "dump-core",                optional_argument, NULL, ARG_DUMP_CORE                },
760                 { "crash-shell",              optional_argument, NULL, ARG_CRASH_SHELL              },
761                 { "confirm-spawn",            optional_argument, NULL, ARG_CONFIRM_SPAWN            },
762                 { "show-status",              optional_argument, NULL, ARG_SHOW_STATUS              },
763                 { "deserialize",              required_argument, NULL, ARG_DESERIALIZE              },
764                 { "switched-root",            no_argument,       NULL, ARG_SWITCHED_ROOT            },
765                 { "introspect",               optional_argument, NULL, ARG_INTROSPECT               },
766                 { "default-standard-output",  required_argument, NULL, ARG_DEFAULT_STD_OUTPUT,      },
767                 { "default-standard-error",   required_argument, NULL, ARG_DEFAULT_STD_ERROR,       },
768                 { NULL,                       0,                 NULL, 0                            }
769         };
770
771         int c, r;
772
773         assert(argc >= 1);
774         assert(argv);
775
776         if (getpid() == 1)
777                 opterr = 0;
778
779         while ((c = getopt_long(argc, argv, "hDbsz:", options, NULL)) >= 0)
780
781                 switch (c) {
782
783                 case ARG_LOG_LEVEL:
784                         if ((r = log_set_max_level_from_string(optarg)) < 0) {
785                                 log_error("Failed to parse log level %s.", optarg);
786                                 return r;
787                         }
788
789                         break;
790
791                 case ARG_LOG_TARGET:
792
793                         if ((r = log_set_target_from_string(optarg)) < 0) {
794                                 log_error("Failed to parse log target %s.", optarg);
795                                 return r;
796                         }
797
798                         break;
799
800                 case ARG_LOG_COLOR:
801
802                         if (optarg) {
803                                 if ((r = log_show_color_from_string(optarg)) < 0) {
804                                         log_error("Failed to parse log color setting %s.", optarg);
805                                         return r;
806                                 }
807                         } else
808                                 log_show_color(true);
809
810                         break;
811
812                 case ARG_LOG_LOCATION:
813
814                         if (optarg) {
815                                 if ((r = log_show_location_from_string(optarg)) < 0) {
816                                         log_error("Failed to parse log location setting %s.", optarg);
817                                         return r;
818                                 }
819                         } else
820                                 log_show_location(true);
821
822                         break;
823
824                 case ARG_DEFAULT_STD_OUTPUT:
825
826                         if ((r = exec_output_from_string(optarg)) < 0) {
827                                 log_error("Failed to parse default standard output setting %s.", optarg);
828                                 return r;
829                         } else
830                                 arg_default_std_output = r;
831                         break;
832
833                 case ARG_DEFAULT_STD_ERROR:
834
835                         if ((r = exec_output_from_string(optarg)) < 0) {
836                                 log_error("Failed to parse default standard error output setting %s.", optarg);
837                                 return r;
838                         } else
839                                 arg_default_std_error = r;
840                         break;
841
842                 case ARG_UNIT:
843
844                         if ((r = set_default_unit(optarg)) < 0) {
845                                 log_error("Failed to set default unit %s: %s", optarg, strerror(-r));
846                                 return r;
847                         }
848
849                         break;
850
851                 case ARG_SYSTEM:
852                         arg_running_as = SYSTEMD_SYSTEM;
853                         break;
854
855                 case ARG_USER:
856                         arg_running_as = SYSTEMD_USER;
857                         break;
858
859                 case ARG_TEST:
860                         arg_action = ACTION_TEST;
861                         break;
862
863                 case ARG_VERSION:
864                         arg_action = ACTION_VERSION;
865                         break;
866
867                 case ARG_DUMP_CONFIGURATION_ITEMS:
868                         arg_action = ACTION_DUMP_CONFIGURATION_ITEMS;
869                         break;
870
871                 case ARG_DUMP_CORE:
872                         r = optarg ? parse_boolean(optarg) : 1;
873                         if (r < 0) {
874                                 log_error("Failed to parse dump core boolean %s.", optarg);
875                                 return r;
876                         }
877                         arg_dump_core = r;
878                         break;
879
880                 case ARG_CRASH_SHELL:
881                         r = optarg ? parse_boolean(optarg) : 1;
882                         if (r < 0) {
883                                 log_error("Failed to parse crash shell boolean %s.", optarg);
884                                 return r;
885                         }
886                         arg_crash_shell = r;
887                         break;
888
889                 case ARG_CONFIRM_SPAWN:
890                         r = optarg ? parse_boolean(optarg) : 1;
891                         if (r < 0) {
892                                 log_error("Failed to parse confirm spawn boolean %s.", optarg);
893                                 return r;
894                         }
895                         arg_confirm_spawn = r;
896                         break;
897
898                 case ARG_SHOW_STATUS:
899                         r = optarg ? parse_boolean(optarg) : 1;
900                         if (r < 0) {
901                                 log_error("Failed to parse show status boolean %s.", optarg);
902                                 return r;
903                         }
904                         arg_show_status = r;
905                         break;
906
907                 case ARG_DESERIALIZE: {
908                         int fd;
909                         FILE *f;
910
911                         r = safe_atoi(optarg, &fd);
912                         if (r < 0 || fd < 0) {
913                                 log_error("Failed to parse deserialize option %s.", optarg);
914                                 return r < 0 ? r : -EINVAL;
915                         }
916
917                         fd_cloexec(fd, true);
918
919                         f = fdopen(fd, "r");
920                         if (!f) {
921                                 log_error("Failed to open serialization fd: %m");
922                                 return -errno;
923                         }
924
925                         if (serialization)
926                                 fclose(serialization);
927
928                         serialization = f;
929
930                         break;
931                 }
932
933                 case ARG_SWITCHED_ROOT:
934                         arg_switched_root = true;
935                         break;
936
937                 case ARG_INTROSPECT: {
938                         const char * const * i = NULL;
939
940                         for (i = bus_interface_table; *i; i += 2)
941                                 if (!optarg || streq(i[0], optarg)) {
942                                         fputs(DBUS_INTROSPECT_1_0_XML_DOCTYPE_DECL_NODE
943                                               "<node>\n", stdout);
944                                         fputs(i[1], stdout);
945                                         fputs("</node>\n", stdout);
946
947                                         if (optarg)
948                                                 break;
949                                 }
950
951                         if (!i[0] && optarg)
952                                 log_error("Unknown interface %s.", optarg);
953
954                         arg_action = ACTION_DONE;
955                         break;
956                 }
957
958                 case 'h':
959                         arg_action = ACTION_HELP;
960                         break;
961
962                 case 'D':
963                         log_set_max_level(LOG_DEBUG);
964                         break;
965
966                 case 'b':
967                 case 's':
968                 case 'z':
969                         /* Just to eat away the sysvinit kernel
970                          * cmdline args without getopt() error
971                          * messages that we'll parse in
972                          * parse_proc_cmdline_word() or ignore. */
973
974                 case '?':
975                 default:
976                         if (getpid() != 1) {
977                                 log_error("Unknown option code %c", c);
978                                 return -EINVAL;
979                         }
980
981                         break;
982                 }
983
984         if (optind < argc && getpid() != 1) {
985                 /* Hmm, when we aren't run as init system
986                  * let's complain about excess arguments */
987
988                 log_error("Excess arguments.");
989                 return -EINVAL;
990         }
991
992         if (detect_container(NULL) > 0) {
993                 char **a;
994
995                 /* All /proc/cmdline arguments the kernel didn't
996                  * understand it passed to us. We're not really
997                  * interested in that usually since /proc/cmdline is
998                  * more interesting and complete. With one exception:
999                  * if we are run in a container /proc/cmdline is not
1000                  * relevant for the container, hence we rely on argv[]
1001                  * instead. */
1002
1003                 for (a = argv; a < argv + argc; a++)
1004                         if ((r = parse_proc_cmdline_word(*a)) < 0) {
1005                                 log_error("Failed on cmdline argument %s: %s", *a, strerror(-r));
1006                                 return r;
1007                         }
1008         }
1009
1010         return 0;
1011 }
1012
1013 static int help(void) {
1014
1015         printf("%s [OPTIONS...]\n\n"
1016                "Starts up and maintains the system or user services.\n\n"
1017                "  -h --help                      Show this help\n"
1018                "     --test                      Determine startup sequence, dump it and exit\n"
1019                "     --dump-configuration-items  Dump understood unit configuration items\n"
1020                "     --introspect[=INTERFACE]    Extract D-Bus interface data\n"
1021                "     --unit=UNIT                 Set default unit\n"
1022                "     --system                    Run a system instance, even if PID != 1\n"
1023                "     --user                      Run a user instance\n"
1024                "     --dump-core[=0|1]           Dump core on crash\n"
1025                "     --crash-shell[=0|1]         Run shell on crash\n"
1026                "     --confirm-spawn[=0|1]       Ask for confirmation when spawning processes\n"
1027                "     --show-status[=0|1]         Show status updates on the console during bootup\n"
1028                "     --log-target=TARGET         Set log target (console, journal, syslog, kmsg, journal-or-kmsg, syslog-or-kmsg, null)\n"
1029                "     --log-level=LEVEL           Set log level (debug, info, notice, warning, err, crit, alert, emerg)\n"
1030                "     --log-color[=0|1]           Highlight important log messages\n"
1031                "     --log-location[=0|1]        Include code location in log messages\n"
1032                "     --default-standard-output=  Set default standard output for services\n"
1033                "     --default-standard-error=   Set default standard error output for services\n",
1034                program_invocation_short_name);
1035
1036         return 0;
1037 }
1038
1039 static int version(void) {
1040         puts(PACKAGE_STRING);
1041         puts(SYSTEMD_FEATURES);
1042
1043         return 0;
1044 }
1045
1046 static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
1047         FILE *f = NULL;
1048         FDSet *fds = NULL;
1049         int r;
1050
1051         assert(m);
1052         assert(_f);
1053         assert(_fds);
1054
1055         /* Make sure nothing is really destructed when we shut down */
1056         m->n_reloading ++;
1057
1058         r = manager_open_serialization(m, &f);
1059         if (r < 0) {
1060                 log_error("Failed to create serialization file: %s", strerror(-r));
1061                 goto fail;
1062         }
1063
1064         fds = fdset_new();
1065         if (!fds) {
1066                 r = -ENOMEM;
1067                 log_error("Failed to allocate fd set: %s", strerror(-r));
1068                 goto fail;
1069         }
1070
1071         r = manager_serialize(m, f, fds, switching_root);
1072         if (r < 0) {
1073                 log_error("Failed to serialize state: %s", strerror(-r));
1074                 goto fail;
1075         }
1076
1077         if (fseeko(f, 0, SEEK_SET) < 0) {
1078                 log_error("Failed to rewind serialization fd: %m");
1079                 goto fail;
1080         }
1081
1082         r = fd_cloexec(fileno(f), false);
1083         if (r < 0) {
1084                 log_error("Failed to disable O_CLOEXEC for serialization: %s", strerror(-r));
1085                 goto fail;
1086         }
1087
1088         r = fdset_cloexec(fds, false);
1089         if (r < 0) {
1090                 log_error("Failed to disable O_CLOEXEC for serialization fds: %s", strerror(-r));
1091                 goto fail;
1092         }
1093
1094         *_f = f;
1095         *_fds = fds;
1096
1097         return 0;
1098
1099 fail:
1100         fdset_free(fds);
1101
1102         if (f)
1103                 fclose(f);
1104
1105         return r;
1106 }
1107
1108 static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
1109         struct rlimit nl;
1110         int r;
1111
1112         assert(saved_rlimit);
1113
1114         /* Save the original RLIMIT_NOFILE so that we can reset it
1115          * later when transitioning from the initrd to the main
1116          * systemd or suchlike. */
1117         if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) {
1118                 log_error("Reading RLIMIT_NOFILE failed: %m");
1119                 return -errno;
1120         }
1121
1122         /* Make sure forked processes get the default kernel setting */
1123         if (!arg_default_rlimit[RLIMIT_NOFILE]) {
1124                 struct rlimit *rl;
1125
1126                 rl = newdup(struct rlimit, saved_rlimit, 1);
1127                 if (!rl)
1128                         return log_oom();
1129
1130                 arg_default_rlimit[RLIMIT_NOFILE] = rl;
1131         }
1132
1133         /* Bump up the resource limit for ourselves substantially */
1134         nl.rlim_cur = nl.rlim_max = 64*1024;
1135         r = setrlimit_closest(RLIMIT_NOFILE, &nl);
1136         if (r < 0) {
1137                 log_error("Setting RLIMIT_NOFILE failed: %s", strerror(-r));
1138                 return r;
1139         }
1140
1141         return 0;
1142 }
1143
1144 static struct dual_timestamp* parse_initrd_timestamp(struct dual_timestamp *t) {
1145         const char *e;
1146         unsigned long long a, b;
1147
1148         assert(t);
1149
1150         e = getenv("RD_TIMESTAMP");
1151         if (!e)
1152                 return NULL;
1153
1154         if (sscanf(e, "%llu %llu", &a, &b) != 2)
1155                 return NULL;
1156
1157         t->realtime = (usec_t) a;
1158         t->monotonic = (usec_t) b;
1159
1160         return t;
1161 }
1162
1163 static void test_mtab(void) {
1164         char *p;
1165
1166         /* Check that /etc/mtab is a symlink */
1167
1168         if (readlink_malloc("/etc/mtab", &p) >= 0) {
1169                 bool b;
1170
1171                 b = streq(p, "/proc/self/mounts") || streq(p, "/proc/mounts");
1172                 free(p);
1173
1174                 if (b)
1175                         return;
1176         }
1177
1178         log_warning("/etc/mtab is not a symlink or not pointing to /proc/self/mounts. "
1179                     "This is not supported anymore. "
1180                     "Please make sure to replace this file by a symlink to avoid incorrect or misleading mount(8) output.");
1181 }
1182
1183 static void test_usr(void) {
1184
1185         /* Check that /usr is not a separate fs */
1186
1187         if (dir_is_empty("/usr") <= 0)
1188                 return;
1189
1190         log_warning("/usr appears to be on its own filesytem and is not already mounted. This is not a supported setup. "
1191                     "Some things will probably break (sometimes even silently) in mysterious ways. "
1192                     "Consult http://freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
1193 }
1194
1195 static void test_cgroups(void) {
1196
1197         if (access("/proc/cgroups", F_OK) >= 0)
1198                 return;
1199
1200         log_warning("CONFIG_CGROUPS was not set when your kernel was compiled. "
1201                     "Systems without control groups are not supported. "
1202                     "We will now sleep for 10s, and then continue boot-up. "
1203                     "Expect breakage and please do not file bugs. "
1204                     "Instead fix your kernel and enable CONFIG_CGROUPS. "
1205                     "Consult http://0pointer.de/blog/projects/cgroups-vs-cgroups.html for more information.");
1206
1207         sleep(10);
1208 }
1209
1210 static int initialize_join_controllers(void) {
1211         /* By default, mount "cpu" + "cpuacct" together, and "net_cls"
1212          * + "net_prio". We'd like to add "cpuset" to the mix, but
1213          * "cpuset" does't really work for groups with no initialized
1214          * attributes. */
1215
1216         arg_join_controllers = new(char**, 3);
1217         if (!arg_join_controllers)
1218                 return -ENOMEM;
1219
1220         arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL);
1221         arg_join_controllers[1] = strv_new("net_cls", "net_prio", NULL);
1222         arg_join_controllers[2] = NULL;
1223
1224         if (!arg_join_controllers[0] || !arg_join_controllers[1]) {
1225                 free_join_controllers();
1226                 return -ENOMEM;
1227         }
1228
1229         return 0;
1230 }
1231
1232 int main(int argc, char *argv[]) {
1233         Manager *m = NULL;
1234         int r, retval = EXIT_FAILURE;
1235         usec_t before_startup, after_startup;
1236         char timespan[FORMAT_TIMESPAN_MAX];
1237         FDSet *fds = NULL;
1238         bool reexecute = false;
1239         const char *shutdown_verb = NULL;
1240         dual_timestamp initrd_timestamp = { 0ULL, 0ULL };
1241         dual_timestamp userspace_timestamp = { 0ULL, 0ULL };
1242         dual_timestamp kernel_timestamp = { 0ULL, 0ULL };
1243         static char systemd[] = "systemd";
1244         bool skip_setup = false;
1245         int j;
1246         bool loaded_policy = false;
1247         bool arm_reboot_watchdog = false;
1248         bool queue_default_job = false;
1249         char *switch_root_dir = NULL, *switch_root_init = NULL;
1250         static struct rlimit saved_rlimit_nofile = { 0, 0 };
1251
1252 #ifdef HAVE_SYSV_COMPAT
1253         if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
1254                 /* This is compatibility support for SysV, where
1255                  * calling init as a user is identical to telinit. */
1256
1257                 errno = -ENOENT;
1258                 execv(SYSTEMCTL_BINARY_PATH, argv);
1259                 log_error("Failed to exec " SYSTEMCTL_BINARY_PATH ": %m");
1260                 return 1;
1261         }
1262 #endif
1263
1264         dual_timestamp_from_monotonic(&kernel_timestamp, 0);
1265         dual_timestamp_get(&userspace_timestamp);
1266
1267         /* Determine if this is a reexecution or normal bootup. We do
1268          * the full command line parsing much later, so let's just
1269          * have a quick peek here. */
1270         if (strv_find(argv+1, "--deserialize"))
1271                 skip_setup = true;
1272
1273         /* If we have switched root, do all the special setup
1274          * things */
1275         if (strv_find(argv+1, "--switched-root"))
1276                 skip_setup = false;
1277
1278         /* If we get started via the /sbin/init symlink then we are
1279            called 'init'. After a subsequent reexecution we are then
1280            called 'systemd'. That is confusing, hence let's call us
1281            systemd right-away. */
1282         program_invocation_short_name = systemd;
1283         prctl(PR_SET_NAME, systemd);
1284
1285         saved_argv = argv;
1286         saved_argc = argc;
1287
1288         log_show_color(isatty(STDERR_FILENO) > 0);
1289
1290         if (getpid() == 1 && detect_container(NULL) <= 0) {
1291
1292                 /* Running outside of a container as PID 1 */
1293                 arg_running_as = SYSTEMD_SYSTEM;
1294                 make_null_stdio();
1295                 log_set_target(LOG_TARGET_KMSG);
1296                 log_open();
1297
1298                 if (in_initrd()) {
1299                         char *rd_timestamp = NULL;
1300
1301                         initrd_timestamp = userspace_timestamp;
1302                         asprintf(&rd_timestamp, "%llu %llu",
1303                                  (unsigned long long) initrd_timestamp.realtime,
1304                                  (unsigned long long) initrd_timestamp.monotonic);
1305                         if (rd_timestamp) {
1306                                 setenv("RD_TIMESTAMP", rd_timestamp, 1);
1307                                 free(rd_timestamp);
1308                         }
1309                 }
1310
1311                 if (!skip_setup) {
1312                         mount_setup_early();
1313                         if (selinux_setup(&loaded_policy) < 0)
1314                                 goto finish;
1315                         if (ima_setup() < 0)
1316                                 goto finish;
1317                         if (smack_setup() < 0)
1318                                 goto finish;
1319                 }
1320
1321                 if (label_init(NULL) < 0)
1322                         goto finish;
1323
1324                 if (!skip_setup) {
1325                         if (hwclock_is_localtime() > 0) {
1326                                 int min;
1327
1328                                 /* The first-time call to settimeofday() does a time warp in the kernel */
1329                                 r = hwclock_set_timezone(&min);
1330                                 if (r < 0)
1331                                         log_error("Failed to apply local time delta, ignoring: %s", strerror(-r));
1332                                 else
1333                                         log_info("RTC configured in localtime, applying delta of %i minutes to system time.", min);
1334                         } else if (!in_initrd()) {
1335                                 /*
1336                                  * Do dummy first-time call to seal the kernel's time warp magic
1337                                  *
1338                                  * Do not call this this from inside the initrd. The initrd might not
1339                                  * carry /etc/adjtime with LOCAL, but the real system could be set up
1340                                  * that way. In such case, we need to delay the time-warp or the sealing
1341                                  * until we reach the real system.
1342                                  */
1343                                 hwclock_reset_timezone();
1344
1345                                 /* Tell the kernel our time zone */
1346                                 r = hwclock_set_timezone(NULL);
1347                                 if (r < 0)
1348                                         log_error("Failed to set the kernel's time zone, ignoring: %s", strerror(-r));
1349                         }
1350                 }
1351
1352                 /* Set the default for later on, but don't actually
1353                  * open the logs like this for now. Note that if we
1354                  * are transitioning from the initrd there might still
1355                  * be journal fd open, and we shouldn't attempt
1356                  * opening that before we parsed /proc/cmdline which
1357                  * might redirect output elsewhere. */
1358                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1359
1360         } else if (getpid() == 1) {
1361                 /* Running inside a container, as PID 1 */
1362                 arg_running_as = SYSTEMD_SYSTEM;
1363                 log_set_target(LOG_TARGET_CONSOLE);
1364                 log_open();
1365
1366                 /* For the later on, see above... */
1367                 log_set_target(LOG_TARGET_JOURNAL);
1368
1369                 /* clear the kernel timestamp,
1370                  * because we are in a container */
1371                 kernel_timestamp.monotonic = 0ULL;
1372                 kernel_timestamp.realtime = 0ULL;
1373
1374         } else {
1375                 /* Running as user instance */
1376                 arg_running_as = SYSTEMD_USER;
1377                 log_set_target(LOG_TARGET_AUTO);
1378                 log_open();
1379
1380                 /* clear the kernel timestamp,
1381                  * because we are not PID 1 */
1382                 kernel_timestamp.monotonic = 0ULL;
1383                 kernel_timestamp.realtime = 0ULL;
1384         }
1385
1386         /* Initialize default unit */
1387         r = set_default_unit(SPECIAL_DEFAULT_TARGET);
1388         if (r < 0) {
1389                 log_error("Failed to set default unit %s: %s", SPECIAL_DEFAULT_TARGET, strerror(-r));
1390                 goto finish;
1391         }
1392
1393         r = initialize_join_controllers();
1394         if (r < 0)
1395                 goto finish;
1396
1397         /* Mount /proc, /sys and friends, so that /proc/cmdline and
1398          * /proc/$PID/fd is available. */
1399         if (getpid() == 1) {
1400                 r = mount_setup(loaded_policy);
1401                 if (r < 0)
1402                         goto finish;
1403         }
1404
1405         /* Reset all signal handlers. */
1406         assert_se(reset_all_signal_handlers() == 0);
1407
1408         /* If we are init, we can block sigkill. Yay. */
1409         ignore_signals(SIGNALS_IGNORE, -1);
1410
1411         if (parse_config_file() < 0)
1412                 goto finish;
1413
1414         if (arg_running_as == SYSTEMD_SYSTEM)
1415                 if (parse_proc_cmdline() < 0)
1416                         goto finish;
1417
1418         log_parse_environment();
1419
1420         if (parse_argv(argc, argv) < 0)
1421                 goto finish;
1422
1423         if (arg_action == ACTION_TEST &&
1424             geteuid() == 0) {
1425                 log_error("Don't run test mode as root.");
1426                 goto finish;
1427         }
1428
1429         if (arg_running_as == SYSTEMD_USER &&
1430             arg_action == ACTION_RUN &&
1431             sd_booted() <= 0) {
1432                 log_error("Trying to run as user instance, but the system has not been booted with systemd.");
1433                 goto finish;
1434         }
1435
1436         if (arg_running_as == SYSTEMD_SYSTEM &&
1437             arg_action == ACTION_RUN &&
1438             running_in_chroot() > 0) {
1439                 log_error("Cannot be run in a chroot() environment.");
1440                 goto finish;
1441         }
1442
1443         if (arg_action == ACTION_HELP) {
1444                 retval = help();
1445                 goto finish;
1446         } else if (arg_action == ACTION_VERSION) {
1447                 retval = version();
1448                 goto finish;
1449         } else if (arg_action == ACTION_DUMP_CONFIGURATION_ITEMS) {
1450                 unit_dump_config_items(stdout);
1451                 retval = EXIT_SUCCESS;
1452                 goto finish;
1453         } else if (arg_action == ACTION_DONE) {
1454                 retval = EXIT_SUCCESS;
1455                 goto finish;
1456         }
1457
1458         assert_se(arg_action == ACTION_RUN || arg_action == ACTION_TEST);
1459
1460         /* Close logging fds, in order not to confuse fdset below */
1461         log_close();
1462
1463         /* Remember open file descriptors for later deserialization */
1464         r = fdset_new_fill(&fds);
1465         if (r < 0) {
1466                 log_error("Failed to allocate fd set: %s", strerror(-r));
1467                 goto finish;
1468         } else
1469                 fdset_cloexec(fds, true);
1470
1471         if (serialization)
1472                 assert_se(fdset_remove(fds, fileno(serialization)) >= 0);
1473
1474         /* Set up PATH unless it is already set */
1475         setenv("PATH",
1476 #ifdef HAVE_SPLIT_USR
1477                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1478 #else
1479                "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin",
1480 #endif
1481                arg_running_as == SYSTEMD_SYSTEM);
1482
1483         if (arg_running_as == SYSTEMD_SYSTEM) {
1484                 /* Parse the data passed to us. We leave this
1485                  * variables set, but the manager later on will not
1486                  * pass them on to our children. */
1487                 if (!in_initrd())
1488                         parse_initrd_timestamp(&initrd_timestamp);
1489
1490                 /* Unset some environment variables passed in from the
1491                  * kernel that don't really make sense for us. */
1492                 unsetenv("HOME");
1493                 unsetenv("TERM");
1494
1495                 /* When we are invoked by a shell, these might be set,
1496                  * but make little sense to pass on */
1497                 unsetenv("PWD");
1498                 unsetenv("SHLVL");
1499                 unsetenv("_");
1500
1501                 /* When we are invoked by a chroot-like tool such as
1502                  * nspawn, these might be set, but make little sense
1503                  * to pass on */
1504                 unsetenv("USER");
1505                 unsetenv("LOGNAME");
1506
1507                 /* We suppress the socket activation env vars, as
1508                  * we'll try to match *any* open fd to units if
1509                  * possible. */
1510                 unsetenv("LISTEN_FDS");
1511                 unsetenv("LISTEN_PID");
1512
1513                 /* All other variables are left as is, so that clients
1514                  * can still read them via /proc/1/environ */
1515         }
1516
1517         /* Move out of the way, so that we won't block unmounts */
1518         assert_se(chdir("/")  == 0);
1519
1520         if (arg_running_as == SYSTEMD_SYSTEM) {
1521                 /* Become a session leader if we aren't one yet. */
1522                 setsid();
1523
1524                 /* Disable the umask logic */
1525                 umask(0);
1526         }
1527
1528         /* Make sure D-Bus doesn't fiddle with the SIGPIPE handlers */
1529         dbus_connection_set_change_sigpipe(FALSE);
1530
1531         /* Reset the console, but only if this is really init and we
1532          * are freshly booted */
1533         if (arg_running_as == SYSTEMD_SYSTEM && arg_action == ACTION_RUN)
1534                 console_setup(getpid() == 1 && !skip_setup);
1535
1536         /* Open the logging devices, if possible and necessary */
1537         log_open();
1538
1539         /* Make sure we leave a core dump without panicing the
1540          * kernel. */
1541         if (getpid() == 1) {
1542                 install_crash_handler();
1543
1544                 r = mount_cgroup_controllers(arg_join_controllers);
1545                 if (r < 0)
1546                         goto finish;
1547         }
1548
1549         if (arg_running_as == SYSTEMD_SYSTEM) {
1550                 const char *virtualization = NULL;
1551
1552                 log_info(PACKAGE_STRING " running in system mode. (" SYSTEMD_FEATURES ")");
1553
1554                 detect_virtualization(&virtualization);
1555                 if (virtualization)
1556                         log_info("Detected virtualization '%s'.", virtualization);
1557
1558                 if (in_initrd())
1559                         log_info("Running in initial RAM disk.");
1560
1561         } else
1562                 log_debug(PACKAGE_STRING " running in user mode. (" SYSTEMD_FEATURES ")");
1563
1564         if (arg_running_as == SYSTEMD_SYSTEM && !skip_setup) {
1565                 locale_setup();
1566
1567                 if (arg_show_status || plymouth_running())
1568                         status_welcome();
1569
1570 #ifdef HAVE_KMOD
1571                 kmod_setup();
1572 #endif
1573                 hostname_setup();
1574                 machine_id_setup();
1575                 loopback_setup();
1576
1577                 test_mtab();
1578                 test_usr();
1579                 test_cgroups();
1580         }
1581
1582         if (arg_running_as == SYSTEMD_SYSTEM && arg_runtime_watchdog > 0)
1583                 watchdog_set_timeout(&arg_runtime_watchdog);
1584
1585         if (arg_timer_slack_nsec != (nsec_t) -1)
1586                 if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0)
1587                         log_error("Failed to adjust timer slack: %m");
1588
1589         if (arg_capability_bounding_set_drop) {
1590                 r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
1591                 if (r < 0) {
1592                         log_error("Failed to drop capability bounding set: %s", strerror(-r));
1593                         goto finish;
1594                 }
1595                 r = capability_bounding_set_drop_usermode(arg_capability_bounding_set_drop);
1596                 if (r < 0) {
1597                         log_error("Failed to drop capability bounding set of usermode helpers: %s", strerror(-r));
1598                         goto finish;
1599                 }
1600         }
1601
1602         if (arg_running_as == SYSTEMD_USER) {
1603                 /* Become reaper of our children */
1604                 if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
1605                         log_warning("Failed to make us a subreaper: %m");
1606                         if (errno == EINVAL)
1607                                 log_info("Perhaps the kernel version is too old (< 3.4?)");
1608                 }
1609         }
1610
1611         if (arg_running_as == SYSTEMD_SYSTEM)
1612                 bump_rlimit_nofile(&saved_rlimit_nofile);
1613
1614         r = manager_new(arg_running_as, &m);
1615         if (r < 0) {
1616                 log_error("Failed to allocate manager object: %s", strerror(-r));
1617                 goto finish;
1618         }
1619
1620         m->confirm_spawn = arg_confirm_spawn;
1621         m->default_std_output = arg_default_std_output;
1622         m->default_std_error = arg_default_std_error;
1623         m->runtime_watchdog = arg_runtime_watchdog;
1624         m->shutdown_watchdog = arg_shutdown_watchdog;
1625         m->userspace_timestamp = userspace_timestamp;
1626         m->kernel_timestamp = kernel_timestamp;
1627         m->initrd_timestamp = initrd_timestamp;
1628
1629         manager_set_default_rlimits(m, arg_default_rlimit);
1630
1631         if (arg_default_controllers)
1632                 manager_set_default_controllers(m, arg_default_controllers);
1633
1634         manager_set_show_status(m, arg_show_status);
1635
1636         /* Remember whether we should queue the default job */
1637         queue_default_job = !serialization || arg_switched_root;
1638
1639         before_startup = now(CLOCK_MONOTONIC);
1640
1641         r = manager_startup(m, serialization, fds);
1642         if (r < 0)
1643                 log_error("Failed to fully start up daemon: %s", strerror(-r));
1644
1645         /* This will close all file descriptors that were opened, but
1646          * not claimed by any unit. */
1647         fdset_free(fds);
1648
1649         if (serialization) {
1650                 fclose(serialization);
1651                 serialization = NULL;
1652         }
1653
1654         if (queue_default_job) {
1655                 DBusError error;
1656                 Unit *target = NULL;
1657                 Job *default_unit_job;
1658
1659                 dbus_error_init(&error);
1660
1661                 log_debug("Activating default unit: %s", arg_default_unit);
1662
1663                 r = manager_load_unit(m, arg_default_unit, NULL, &error, &target);
1664                 if (r < 0) {
1665                         log_error("Failed to load default target: %s", bus_error(&error, r));
1666                         dbus_error_free(&error);
1667                 } else if (target->load_state == UNIT_ERROR)
1668                         log_error("Failed to load default target: %s", strerror(-target->load_error));
1669                 else if (target->load_state == UNIT_MASKED)
1670                         log_error("Default target masked.");
1671
1672                 if (!target || target->load_state != UNIT_LOADED) {
1673                         log_info("Trying to load rescue target...");
1674
1675                         r = manager_load_unit(m, SPECIAL_RESCUE_TARGET, NULL, &error, &target);
1676                         if (r < 0) {
1677                                 log_error("Failed to load rescue target: %s", bus_error(&error, r));
1678                                 dbus_error_free(&error);
1679                                 goto finish;
1680                         } else if (target->load_state == UNIT_ERROR) {
1681                                 log_error("Failed to load rescue target: %s", strerror(-target->load_error));
1682                                 goto finish;
1683                         } else if (target->load_state == UNIT_MASKED) {
1684                                 log_error("Rescue target masked.");
1685                                 goto finish;
1686                         }
1687                 }
1688
1689                 assert(target->load_state == UNIT_LOADED);
1690
1691                 if (arg_action == ACTION_TEST) {
1692                         printf("-> By units:\n");
1693                         manager_dump_units(m, stdout, "\t");
1694                 }
1695
1696                 r = manager_add_job(m, JOB_START, target, JOB_ISOLATE, false, &error, &default_unit_job);
1697                 if (r == -EPERM) {
1698                         log_debug("Default target could not be isolated, starting instead: %s", bus_error(&error, r));
1699                         dbus_error_free(&error);
1700
1701                         r = manager_add_job(m, JOB_START, target, JOB_REPLACE, false, &error, &default_unit_job);
1702                         if (r < 0) {
1703                                 log_error("Failed to start default target: %s", bus_error(&error, r));
1704                                 dbus_error_free(&error);
1705                                 goto finish;
1706                         }
1707                 } else if (r < 0) {
1708                         log_error("Failed to isolate default target: %s", bus_error(&error, r));
1709                         dbus_error_free(&error);
1710                         goto finish;
1711                 }
1712
1713                 m->default_unit_job_id = default_unit_job->id;
1714
1715                 after_startup = now(CLOCK_MONOTONIC);
1716                 log_full(arg_action == ACTION_TEST ? LOG_INFO : LOG_DEBUG,
1717                          "Loaded units and determined initial transaction in %s.",
1718                          format_timespan(timespan, sizeof(timespan), after_startup - before_startup, 0));
1719
1720                 if (arg_action == ACTION_TEST) {
1721                         printf("-> By jobs:\n");
1722                         manager_dump_jobs(m, stdout, "\t");
1723                         retval = EXIT_SUCCESS;
1724                         goto finish;
1725                 }
1726         }
1727
1728         for (;;) {
1729                 r = manager_loop(m);
1730                 if (r < 0) {
1731                         log_error("Failed to run mainloop: %s", strerror(-r));
1732                         goto finish;
1733                 }
1734
1735                 switch (m->exit_code) {
1736
1737                 case MANAGER_EXIT:
1738                         retval = EXIT_SUCCESS;
1739                         log_debug("Exit.");
1740                         goto finish;
1741
1742                 case MANAGER_RELOAD:
1743                         log_info("Reloading.");
1744                         r = manager_reload(m);
1745                         if (r < 0)
1746                                 log_error("Failed to reload: %s", strerror(-r));
1747                         break;
1748
1749                 case MANAGER_REEXECUTE:
1750
1751                         if (prepare_reexecute(m, &serialization, &fds, false) < 0)
1752                                 goto finish;
1753
1754                         reexecute = true;
1755                         log_notice("Reexecuting.");
1756                         goto finish;
1757
1758                 case MANAGER_SWITCH_ROOT:
1759                         /* Steal the switch root parameters */
1760                         switch_root_dir = m->switch_root;
1761                         switch_root_init = m->switch_root_init;
1762                         m->switch_root = m->switch_root_init = NULL;
1763
1764                         if (!switch_root_init)
1765                                 if (prepare_reexecute(m, &serialization, &fds, true) < 0)
1766                                         goto finish;
1767
1768                         reexecute = true;
1769                         log_notice("Switching root.");
1770                         goto finish;
1771
1772                 case MANAGER_REBOOT:
1773                 case MANAGER_POWEROFF:
1774                 case MANAGER_HALT:
1775                 case MANAGER_KEXEC: {
1776                         static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
1777                                 [MANAGER_REBOOT] = "reboot",
1778                                 [MANAGER_POWEROFF] = "poweroff",
1779                                 [MANAGER_HALT] = "halt",
1780                                 [MANAGER_KEXEC] = "kexec"
1781                         };
1782
1783                         assert_se(shutdown_verb = table[m->exit_code]);
1784                         arm_reboot_watchdog = m->exit_code == MANAGER_REBOOT;
1785
1786                         log_notice("Shutting down.");
1787                         goto finish;
1788                 }
1789
1790                 default:
1791                         assert_not_reached("Unknown exit code.");
1792                 }
1793         }
1794
1795 finish:
1796         if (m)
1797                 manager_free(m);
1798
1799         for (j = 0; j < RLIMIT_NLIMITS; j++)
1800                 free(arg_default_rlimit[j]);
1801
1802         free(arg_default_unit);
1803         strv_free(arg_default_controllers);
1804         free_join_controllers();
1805
1806         dbus_shutdown();
1807         label_finish();
1808
1809         if (reexecute) {
1810                 const char **args;
1811                 unsigned i, args_size;
1812
1813                 /* Close and disarm the watchdog, so that the new
1814                  * instance can reinitialize it, but doesn't get
1815                  * rebooted while we do that */
1816                 watchdog_close(true);
1817
1818                 /* Reset the RLIMIT_NOFILE to the kernel default, so
1819                  * that the new systemd can pass the kernel default to
1820                  * its child processes */
1821                 if (saved_rlimit_nofile.rlim_cur > 0)
1822                         setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
1823
1824                 if (switch_root_dir) {
1825                         /* Kill all remaining processes from the
1826                          * initrd, but don't wait for them, so that we
1827                          * can handle the SIGCHLD for them after
1828                          * deserializing. */
1829                         broadcast_signal(SIGTERM, false);
1830
1831                         /* And switch root */
1832                         r = switch_root(switch_root_dir);
1833                         if (r < 0)
1834                                 log_error("Failed to switch root, ignoring: %s", strerror(-r));
1835                 }
1836
1837                 args_size = MAX(6, argc+1);
1838                 args = newa(const char*, args_size);
1839
1840                 if (!switch_root_init) {
1841                         char sfd[16];
1842
1843                         /* First try to spawn ourselves with the right
1844                          * path, and with full serialization. We do
1845                          * this only if the user didn't specify an
1846                          * explicit init to spawn. */
1847
1848                         assert(serialization);
1849                         assert(fds);
1850
1851                         snprintf(sfd, sizeof(sfd), "%i", fileno(serialization));
1852                         char_array_0(sfd);
1853
1854                         i = 0;
1855                         args[i++] = SYSTEMD_BINARY_PATH;
1856                         if (switch_root_dir)
1857                                 args[i++] = "--switched-root";
1858                         args[i++] = arg_running_as == SYSTEMD_SYSTEM ? "--system" : "--user";
1859                         args[i++] = "--deserialize";
1860                         args[i++] = sfd;
1861                         args[i++] = NULL;
1862
1863                         assert(i <= args_size);
1864                         execv(args[0], (char* const*) args);
1865                 }
1866
1867                 /* Try the fallback, if there is any, without any
1868                  * serialization. We pass the original argv[] and
1869                  * envp[]. (Well, modulo the ordering changes due to
1870                  * getopt() in argv[], and some cleanups in envp[],
1871                  * but let's hope that doesn't matter.) */
1872
1873                 if (serialization) {
1874                         fclose(serialization);
1875                         serialization = NULL;
1876                 }
1877
1878                 if (fds) {
1879                         fdset_free(fds);
1880                         fds = NULL;
1881                 }
1882
1883                 /* Reopen the console */
1884                 make_console_stdio();
1885
1886                 for (j = 1, i = 1; j < argc; j++)
1887                         args[i++] = argv[j];
1888                 args[i++] = NULL;
1889                 assert(i <= args_size);
1890
1891                 if (switch_root_init) {
1892                         args[0] = switch_root_init;
1893                         execv(args[0], (char* const*) args);
1894                         log_warning("Failed to execute configured init, trying fallback: %m");
1895                 }
1896
1897                 args[0] = "/sbin/init";
1898                 execv(args[0], (char* const*) args);
1899
1900                 if (errno == ENOENT) {
1901                         log_warning("No /sbin/init, trying fallback");
1902
1903                         args[0] = "/bin/sh";
1904                         args[1] = NULL;
1905                         execv(args[0], (char* const*) args);
1906                         log_error("Failed to execute /bin/sh, giving up: %m");
1907                 } else
1908                         log_warning("Failed to execute /sbin/init, giving up: %m");
1909         }
1910
1911         if (serialization)
1912                 fclose(serialization);
1913
1914         if (fds)
1915                 fdset_free(fds);
1916
1917         if (shutdown_verb) {
1918                 const char * command_line[] = {
1919                         SYSTEMD_SHUTDOWN_BINARY_PATH,
1920                         shutdown_verb,
1921                         NULL
1922                 };
1923                 char **env_block;
1924
1925                 if (arm_reboot_watchdog && arg_shutdown_watchdog > 0) {
1926                         char e[32];
1927
1928                         /* If we reboot let's set the shutdown
1929                          * watchdog and tell the shutdown binary to
1930                          * repeatedly ping it */
1931                         watchdog_set_timeout(&arg_shutdown_watchdog);
1932                         watchdog_close(false);
1933
1934                         /* Tell the binary how often to ping */
1935                         snprintf(e, sizeof(e), "WATCHDOG_USEC=%llu", (unsigned long long) arg_shutdown_watchdog);
1936                         char_array_0(e);
1937
1938                         env_block = strv_append(environ, e);
1939                 } else {
1940                         env_block = strv_copy(environ);
1941                         watchdog_close(true);
1942                 }
1943
1944                 execve(SYSTEMD_SHUTDOWN_BINARY_PATH, (char **) command_line, env_block);
1945                 free(env_block);
1946                 log_error("Failed to execute shutdown binary, freezing: %m");
1947         }
1948
1949         if (getpid() == 1)
1950                 freeze();
1951
1952         return retval;
1953 }