chiark / gitweb /
Revert "socket: add support for TCP fast Open"
[elogind.git] / src / core / shutdown.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 ProFUSION embedded systems
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <sys/types.h>
24 #include <sys/reboot.h>
25 #include <linux/reboot.h>
26 #include <sys/wait.h>
27 #include <sys/stat.h>
28 #include <sys/mount.h>
29 #include <sys/syscall.h>
30 #include <fcntl.h>
31 #include <dirent.h>
32 #include <errno.h>
33 #include <unistd.h>
34 #include <signal.h>
35 #include <stdbool.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <getopt.h>
39
40 #include "missing.h"
41 #include "log.h"
42 #include "fileio.h"
43 #include "umount.h"
44 #include "util.h"
45 #include "mkdir.h"
46 #include "virt.h"
47 #include "watchdog.h"
48 #include "killall.h"
49 #include "cgroup-util.h"
50 #include "def.h"
51
52 #define FINALIZE_ATTEMPTS 50
53
54 static char* arg_verb;
55
56 static int parse_argv(int argc, char *argv[]) {
57         enum {
58                 ARG_LOG_LEVEL = 0x100,
59                 ARG_LOG_TARGET,
60                 ARG_LOG_COLOR,
61                 ARG_LOG_LOCATION,
62         };
63
64         static const struct option options[] = {
65                 { "log-level",     required_argument, NULL, ARG_LOG_LEVEL    },
66                 { "log-target",    required_argument, NULL, ARG_LOG_TARGET   },
67                 { "log-color",     optional_argument, NULL, ARG_LOG_COLOR    },
68                 { "log-location",  optional_argument, NULL, ARG_LOG_LOCATION },
69                 {}
70         };
71
72         int c, r;
73
74         assert(argc >= 1);
75         assert(argv);
76
77         while ((c = getopt_long(argc, argv, "", options, NULL)) >= 0)
78                 switch (c) {
79
80                 case ARG_LOG_LEVEL:
81                         r = log_set_max_level_from_string(optarg);
82                         if (r < 0)
83                                 log_error("Failed to parse log level %s, ignoring.", optarg);
84
85                         break;
86
87                 case ARG_LOG_TARGET:
88                         r = log_set_target_from_string(optarg);
89                         if (r < 0)
90                                 log_error("Failed to parse log target %s, ignoring", optarg);
91
92                         break;
93
94                 case ARG_LOG_COLOR:
95
96                         if (optarg) {
97                                 r = log_show_color_from_string(optarg);
98                                 if (r < 0)
99                                         log_error("Failed to parse log color setting %s, ignoring", optarg);
100                         } else
101                                 log_show_color(true);
102
103                         break;
104
105                 case ARG_LOG_LOCATION:
106                         if (optarg) {
107                                 r = log_show_location_from_string(optarg);
108                                 if (r < 0)
109                                         log_error("Failed to parse log location setting %s, ignoring", optarg);
110                         } else
111                                 log_show_location(true);
112
113                         break;
114
115                 case '?':
116                         return -EINVAL;
117
118                 default:
119                         assert_not_reached("Unhandled option code.");
120                 }
121
122         if (optind >= argc) {
123                 log_error("Verb argument missing.");
124                 return -EINVAL;
125         }
126
127         arg_verb = argv[optind];
128
129         if (optind + 1 < argc)
130                 log_error("Excess arguments, ignoring");
131         return 0;
132 }
133
134 static int prepare_new_root(void) {
135         static const char dirs[] =
136                 "/run/initramfs/oldroot\0"
137                 "/run/initramfs/proc\0"
138                 "/run/initramfs/sys\0"
139                 "/run/initramfs/dev\0"
140                 "/run/initramfs/run\0";
141
142         const char *dir;
143
144         if (mount("/run/initramfs", "/run/initramfs", NULL, MS_BIND, NULL) < 0) {
145                 log_error("Failed to mount bind /run/initramfs on /run/initramfs: %m");
146                 return -errno;
147         }
148
149         if (mount(NULL, "/run/initramfs", NULL, MS_PRIVATE, NULL) < 0) {
150                 log_error("Failed to make /run/initramfs private mount: %m");
151                 return -errno;
152         }
153
154         NULSTR_FOREACH(dir, dirs)
155                 if (mkdir_p_label(dir, 0755) < 0 && errno != EEXIST) {
156                         log_error("Failed to mkdir %s: %m", dir);
157                         return -errno;
158                 }
159
160         if (mount("/sys", "/run/initramfs/sys", NULL, MS_BIND, NULL) < 0) {
161                 log_error("Failed to mount bind /sys on /run/initramfs/sys: %m");
162                 return -errno;
163         }
164
165         if (mount("/proc", "/run/initramfs/proc", NULL, MS_BIND, NULL) < 0) {
166                 log_error("Failed to mount bind /proc on /run/initramfs/proc: %m");
167                 return -errno;
168         }
169
170         if (mount("/dev", "/run/initramfs/dev", NULL, MS_BIND, NULL) < 0) {
171                 log_error("Failed to mount bind /dev on /run/initramfs/dev: %m");
172                 return -errno;
173         }
174
175         if (mount("/run", "/run/initramfs/run", NULL, MS_BIND, NULL) < 0) {
176                 log_error("Failed to mount bind /run on /run/initramfs/run: %m");
177                 return -errno;
178         }
179
180         return 0;
181 }
182
183 static int pivot_to_new_root(void) {
184
185         if (chdir("/run/initramfs") < 0) {
186                 log_error("Failed to change directory to /run/initramfs: %m");
187                 return -errno;
188         }
189
190         /* Work-around for a kernel bug: for some reason the kernel
191          * refuses switching root if any file systems are mounted
192          * MS_SHARED. Hence remount them MS_PRIVATE here as a
193          * work-around.
194          *
195          * https://bugzilla.redhat.com/show_bug.cgi?id=847418 */
196         if (mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL) < 0)
197                 log_warning("Failed to make \"/\" private mount: %m");
198
199         if (pivot_root(".", "oldroot") < 0) {
200                 log_error("pivot failed: %m");
201                 /* only chroot if pivot root succeeded */
202                 return -errno;
203         }
204
205         chroot(".");
206
207         setsid();
208         make_console_stdio();
209
210         log_info("Successfully changed into root pivot.");
211
212         return 0;
213 }
214
215 int main(int argc, char *argv[]) {
216         bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
217         bool in_container, use_watchdog = false;
218         _cleanup_free_ char *cgroup = NULL;
219         char *arguments[3];
220         unsigned retries;
221         int cmd, r;
222
223         log_parse_environment();
224         r = parse_argv(argc, argv);
225         if (r < 0)
226                 goto error;
227
228         /* journald will die if not gone yet. The log target defaults
229          * to console, but may have been changed by commandline options. */
230
231         log_close_console(); /* force reopen of /dev/console */
232         log_open();
233
234         umask(0022);
235
236         if (getpid() != 1) {
237                 log_error("Not executed by init (PID 1).");
238                 r = -EPERM;
239                 goto error;
240         }
241
242         if (streq(arg_verb, "reboot"))
243                 cmd = RB_AUTOBOOT;
244         else if (streq(arg_verb, "poweroff"))
245                 cmd = RB_POWER_OFF;
246         else if (streq(arg_verb, "halt"))
247                 cmd = RB_HALT_SYSTEM;
248         else if (streq(arg_verb, "kexec"))
249                 cmd = LINUX_REBOOT_CMD_KEXEC;
250         else {
251                 r = -EINVAL;
252                 log_error("Unknown action '%s'.", arg_verb);
253                 goto error;
254         }
255
256         cg_get_root_path(&cgroup);
257
258         use_watchdog = !!getenv("WATCHDOG_USEC");
259
260         /* lock us into memory */
261         mlockall(MCL_CURRENT|MCL_FUTURE);
262
263         log_info("Sending SIGTERM to remaining processes...");
264         broadcast_signal(SIGTERM, true, true);
265
266         log_info("Sending SIGKILL to remaining processes...");
267         broadcast_signal(SIGKILL, true, false);
268
269         in_container = detect_container(NULL) > 0;
270
271         need_umount = true;
272         need_swapoff = !in_container;
273         need_loop_detach = !in_container;
274         need_dm_detach = !in_container;
275
276         /* Unmount all mountpoints, swaps, and loopback devices */
277         for (retries = 0; retries < FINALIZE_ATTEMPTS; retries++) {
278                 bool changed = false;
279
280                 if (use_watchdog)
281                         watchdog_ping();
282
283                 /* Let's trim the cgroup tree on each iteration so
284                    that we leave an empty cgroup tree around, so that
285                    container managers get a nice notify event when we
286                    are down */
287                 if (cgroup)
288                         cg_trim(SYSTEMD_CGROUP_CONTROLLER, cgroup, false);
289
290                 if (need_umount) {
291                         log_info("Unmounting file systems.");
292                         r = umount_all(&changed);
293                         if (r == 0) {
294                                 need_umount = false;
295                                 log_info("All filesystems unmounted.");
296                         } else if (r > 0)
297                                 log_info("Not all file systems unmounted, %d left.", r);
298                         else
299                                 log_error("Failed to unmount file systems: %s", strerror(-r));
300                 }
301
302                 if (need_swapoff) {
303                         log_info("Deactivating swaps.");
304                         r = swapoff_all(&changed);
305                         if (r == 0) {
306                                 need_swapoff = false;
307                                 log_info("All swaps deactivated.");
308                         } else if (r > 0)
309                                 log_info("Not all swaps deactivated, %d left.", r);
310                         else
311                                 log_error("Failed to deactivate swaps: %s", strerror(-r));
312                 }
313
314                 if (need_loop_detach) {
315                         log_info("Detaching loop devices.");
316                         r = loopback_detach_all(&changed);
317                         if (r == 0) {
318                                 need_loop_detach = false;
319                                 log_info("All loop devices detached.");
320                         } else if (r > 0)
321                                 log_info("Not all loop devices detached, %d left.", r);
322                         else
323                                 log_error("Failed to detach loop devices: %s", strerror(-r));
324                 }
325
326                 if (need_dm_detach) {
327                         log_info("Detaching DM devices.");
328                         r = dm_detach_all(&changed);
329                         if (r == 0) {
330                                 need_dm_detach = false;
331                                 log_info("All DM devices detached.");
332                         } else if (r > 0)
333                                 log_info("Not all DM devices detached, %d left.", r);
334                         else
335                                 log_error("Failed to detach DM devices: %s", strerror(-r));
336                 }
337
338                 if (!need_umount && !need_swapoff && !need_loop_detach && !need_dm_detach) {
339                         if (retries > 0)
340                                 log_info("All filesystems, swaps, loop devices, DM devices detached.");
341                         /* Yay, done */
342                         goto initrd_jump;
343                 }
344
345                 /* If in this iteration we didn't manage to
346                  * unmount/deactivate anything, we simply give up */
347                 if (!changed) {
348                         log_info("Cannot finalize remaining%s%s%s%s continuing.",
349                                  need_umount ? " file systems," : "",
350                                  need_swapoff ? " swap devices," : "",
351                                  need_loop_detach ? " loop devices," : "",
352                                  need_dm_detach ? " DM devices," : "");
353                         goto initrd_jump;
354                 }
355
356                 log_debug("After %u retries, couldn't finalize remaining %s%s%s%s trying again.",
357                           retries + 1,
358                           need_umount ? " file systems," : "",
359                           need_swapoff ? " swap devices," : "",
360                           need_loop_detach ? " loop devices," : "",
361                           need_dm_detach ? " DM devices," : "");
362         }
363
364         log_error("Too many iterations, giving up.");
365
366  initrd_jump:
367
368         arguments[0] = NULL;
369         arguments[1] = arg_verb;
370         arguments[2] = NULL;
371         execute_directory(SYSTEM_SHUTDOWN_PATH, NULL, DEFAULT_TIMEOUT_USEC, arguments);
372
373         if (!in_container && !in_initrd() &&
374             access("/run/initramfs/shutdown", X_OK) == 0) {
375
376                 if (prepare_new_root() >= 0 &&
377                     pivot_to_new_root() >= 0) {
378                         arguments[0] = (char*) "/shutdown";
379
380                         log_info("Returning to initrd...");
381
382                         execv("/shutdown", arguments);
383                         log_error("Failed to execute shutdown binary: %m");
384                 }
385         }
386
387         if (need_umount || need_swapoff || need_loop_detach || need_dm_detach)
388                 log_error("Failed to finalize %s%s%s%s ignoring",
389                           need_umount ? " file systems," : "",
390                           need_swapoff ? " swap devices," : "",
391                           need_loop_detach ? " loop devices," : "",
392                           need_dm_detach ? " DM devices," : "");
393
394         /* The kernel will automaticall flush ATA disks and suchlike
395          * on reboot(), but the file systems need to be synce'd
396          * explicitly in advance. So let's do this here, but not
397          * needlessly slow down containers. */
398         if (!in_container)
399                 sync();
400
401         switch (cmd) {
402
403         case LINUX_REBOOT_CMD_KEXEC:
404
405                 if (!in_container) {
406                         /* We cheat and exec kexec to avoid doing all its work */
407                         pid_t pid;
408
409                         log_info("Rebooting with kexec.");
410
411                         pid = fork();
412                         if (pid < 0)
413                                 log_error("Failed to fork: %m");
414                         else if (pid == 0) {
415
416                                 const char * const args[] = {
417                                         KEXEC, "-e", NULL
418                                 };
419
420                                 /* Child */
421
422                                 execv(args[0], (char * const *) args);
423                                 _exit(EXIT_FAILURE);
424                         } else
425                                 wait_for_terminate_and_warn("kexec", pid);
426                 }
427
428                 cmd = RB_AUTOBOOT;
429                 /* Fall through */
430
431         case RB_AUTOBOOT:
432
433                 if (!in_container) {
434                         _cleanup_free_ char *param = NULL;
435
436                         if (read_one_line_file(REBOOT_PARAM_FILE, &param) >= 0) {
437                                 log_info("Rebooting with argument '%s'.", param);
438                                 syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2,
439                                         LINUX_REBOOT_CMD_RESTART2, param);
440                         }
441                 }
442
443                 log_info("Rebooting.");
444                 break;
445
446         case RB_POWER_OFF:
447                 log_info("Powering off.");
448                 break;
449
450         case RB_HALT_SYSTEM:
451                 log_info("Halting system.");
452                 break;
453
454         default:
455                 assert_not_reached("Unknown magic");
456         }
457
458         reboot(cmd);
459         if (errno == EPERM && in_container) {
460                 /* If we are in a container, and we lacked
461                  * CAP_SYS_BOOT just exit, this will kill our
462                  * container for good. */
463                 log_info("Exiting container.");
464                 exit(0);
465         }
466
467         log_error("Failed to invoke reboot(): %m");
468         r = -errno;
469
470   error:
471         log_error("Critical error while doing system shutdown: %s", strerror(-r));
472
473         freeze();
474 }