chiark / gitweb /
c59f7e2daa63b3c50bfc8ecb081a2e9be9fb62ba
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40
41 #ifdef HAVE_PAM
42 #include <security/pam_appl.h>
43 #endif
44
45 #include "execute.h"
46 #include "strv.h"
47 #include "macro.h"
48 #include "capability.h"
49 #include "util.h"
50 #include "log.h"
51 #include "ioprio.h"
52 #include "securebits.h"
53 #include "cgroup.h"
54 #include "namespace.h"
55 #include "tcpwrap.h"
56 #include "exit-status.h"
57 #include "missing.h"
58 #include "utmp-wtmp.h"
59 #include "def.h"
60 #include "loopback-setup.h"
61
62 /* This assumes there is a 'tty' group */
63 #define TTY_MODE 0620
64
65 static int shift_fds(int fds[], unsigned n_fds) {
66         int start, restart_from;
67
68         if (n_fds <= 0)
69                 return 0;
70
71         /* Modifies the fds array! (sorts it) */
72
73         assert(fds);
74
75         start = 0;
76         for (;;) {
77                 int i;
78
79                 restart_from = -1;
80
81                 for (i = start; i < (int) n_fds; i++) {
82                         int nfd;
83
84                         /* Already at right index? */
85                         if (fds[i] == i+3)
86                                 continue;
87
88                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
89                                 return -errno;
90
91                         close_nointr_nofail(fds[i]);
92                         fds[i] = nfd;
93
94                         /* Hmm, the fd we wanted isn't free? Then
95                          * let's remember that and try again from here*/
96                         if (nfd != i+3 && restart_from < 0)
97                                 restart_from = i;
98                 }
99
100                 if (restart_from < 0)
101                         break;
102
103                 start = restart_from;
104         }
105
106         return 0;
107 }
108
109 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
110         unsigned i;
111         int r;
112
113         if (n_fds <= 0)
114                 return 0;
115
116         assert(fds);
117
118         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
119
120         for (i = 0; i < n_fds; i++) {
121
122                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
123                         return r;
124
125                 /* We unconditionally drop FD_CLOEXEC from the fds,
126                  * since after all we want to pass these fds to our
127                  * children */
128
129                 if ((r = fd_cloexec(fds[i], false)) < 0)
130                         return r;
131         }
132
133         return 0;
134 }
135
136 static const char *tty_path(const ExecContext *context) {
137         assert(context);
138
139         if (context->tty_path)
140                 return context->tty_path;
141
142         return "/dev/console";
143 }
144
145 void exec_context_tty_reset(const ExecContext *context) {
146         assert(context);
147
148         if (context->tty_vhangup)
149                 terminal_vhangup(tty_path(context));
150
151         if (context->tty_reset)
152                 reset_terminal(tty_path(context));
153
154         if (context->tty_vt_disallocate && context->tty_path)
155                 vt_disallocate(context->tty_path);
156 }
157
158 static int open_null_as(int flags, int nfd) {
159         int fd, r;
160
161         assert(nfd >= 0);
162
163         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
164                 return -errno;
165
166         if (fd != nfd) {
167                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
168                 close_nointr_nofail(fd);
169         } else
170                 r = nfd;
171
172         return r;
173 }
174
175 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
176         int fd, r;
177         union sockaddr_union sa;
178
179         assert(context);
180         assert(output < _EXEC_OUTPUT_MAX);
181         assert(ident);
182         assert(nfd >= 0);
183
184         fd = socket(AF_UNIX, SOCK_STREAM, 0);
185         if (fd < 0)
186                 return -errno;
187
188         zero(sa);
189         sa.un.sun_family = AF_UNIX;
190         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
191
192         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
193         if (r < 0) {
194                 close_nointr_nofail(fd);
195                 return -errno;
196         }
197
198         if (shutdown(fd, SHUT_RD) < 0) {
199                 close_nointr_nofail(fd);
200                 return -errno;
201         }
202
203         dprintf(fd,
204                 "%s\n"
205                 "%i\n"
206                 "%i\n"
207                 "%i\n"
208                 "%i\n"
209                 "%i\n",
210                 context->syslog_identifier ? context->syslog_identifier : ident,
211                 context->syslog_priority,
212                 !!context->syslog_level_prefix,
213                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
214                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
215                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
216
217         if (fd != nfd) {
218                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
219                 close_nointr_nofail(fd);
220         } else
221                 r = nfd;
222
223         return r;
224 }
225 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
226         int fd, r;
227
228         assert(path);
229         assert(nfd >= 0);
230
231         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
232                 return fd;
233
234         if (fd != nfd) {
235                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
236                 close_nointr_nofail(fd);
237         } else
238                 r = nfd;
239
240         return r;
241 }
242
243 static bool is_terminal_input(ExecInput i) {
244         return
245                 i == EXEC_INPUT_TTY ||
246                 i == EXEC_INPUT_TTY_FORCE ||
247                 i == EXEC_INPUT_TTY_FAIL;
248 }
249
250 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
251
252         if (is_terminal_input(std_input) && !apply_tty_stdin)
253                 return EXEC_INPUT_NULL;
254
255         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
256                 return EXEC_INPUT_NULL;
257
258         return std_input;
259 }
260
261 static int fixup_output(ExecOutput std_output, int socket_fd) {
262
263         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
264                 return EXEC_OUTPUT_INHERIT;
265
266         return std_output;
267 }
268
269 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
270         ExecInput i;
271
272         assert(context);
273
274         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
275
276         switch (i) {
277
278         case EXEC_INPUT_NULL:
279                 return open_null_as(O_RDONLY, STDIN_FILENO);
280
281         case EXEC_INPUT_TTY:
282         case EXEC_INPUT_TTY_FORCE:
283         case EXEC_INPUT_TTY_FAIL: {
284                 int fd, r;
285
286                 if ((fd = acquire_terminal(
287                                      tty_path(context),
288                                      i == EXEC_INPUT_TTY_FAIL,
289                                      i == EXEC_INPUT_TTY_FORCE,
290                                      false)) < 0)
291                         return fd;
292
293                 if (fd != STDIN_FILENO) {
294                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
295                         close_nointr_nofail(fd);
296                 } else
297                         r = STDIN_FILENO;
298
299                 return r;
300         }
301
302         case EXEC_INPUT_SOCKET:
303                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
304
305         default:
306                 assert_not_reached("Unknown input type");
307         }
308 }
309
310 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
311         ExecOutput o;
312         ExecInput i;
313
314         assert(context);
315         assert(ident);
316
317         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
318         o = fixup_output(context->std_output, socket_fd);
319
320         /* This expects the input is already set up */
321
322         switch (o) {
323
324         case EXEC_OUTPUT_INHERIT:
325
326                 /* If input got downgraded, inherit the original value */
327                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
328                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
329
330                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
331                 if (i != EXEC_INPUT_NULL)
332                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
333
334                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
335                 if (getppid() != 1)
336                         return STDOUT_FILENO;
337
338                 /* We need to open /dev/null here anew, to get the
339                  * right access mode. So we fall through */
340
341         case EXEC_OUTPUT_NULL:
342                 return open_null_as(O_WRONLY, STDOUT_FILENO);
343
344         case EXEC_OUTPUT_TTY:
345                 if (is_terminal_input(i))
346                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
347
348                 /* We don't reset the terminal if this is just about output */
349                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
350
351         case EXEC_OUTPUT_SYSLOG:
352         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
353         case EXEC_OUTPUT_KMSG:
354         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
355         case EXEC_OUTPUT_JOURNAL:
356         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
357                 return connect_logger_as(context, o, ident, STDOUT_FILENO);
358
359         case EXEC_OUTPUT_SOCKET:
360                 assert(socket_fd >= 0);
361                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
362
363         default:
364                 assert_not_reached("Unknown output type");
365         }
366 }
367
368 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
369         ExecOutput o, e;
370         ExecInput i;
371
372         assert(context);
373         assert(ident);
374
375         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
376         o = fixup_output(context->std_output, socket_fd);
377         e = fixup_output(context->std_error, socket_fd);
378
379         /* This expects the input and output are already set up */
380
381         /* Don't change the stderr file descriptor if we inherit all
382          * the way and are not on a tty */
383         if (e == EXEC_OUTPUT_INHERIT &&
384             o == EXEC_OUTPUT_INHERIT &&
385             i == EXEC_INPUT_NULL &&
386             !is_terminal_input(context->std_input) &&
387             getppid () != 1)
388                 return STDERR_FILENO;
389
390         /* Duplicate from stdout if possible */
391         if (e == o || e == EXEC_OUTPUT_INHERIT)
392                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
393
394         switch (e) {
395
396         case EXEC_OUTPUT_NULL:
397                 return open_null_as(O_WRONLY, STDERR_FILENO);
398
399         case EXEC_OUTPUT_TTY:
400                 if (is_terminal_input(i))
401                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
402
403                 /* We don't reset the terminal if this is just about output */
404                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
405
406         case EXEC_OUTPUT_SYSLOG:
407         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
408         case EXEC_OUTPUT_KMSG:
409         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
410         case EXEC_OUTPUT_JOURNAL:
411         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
412                 return connect_logger_as(context, e, ident, STDERR_FILENO);
413
414         case EXEC_OUTPUT_SOCKET:
415                 assert(socket_fd >= 0);
416                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
417
418         default:
419                 assert_not_reached("Unknown error type");
420         }
421 }
422
423 static int chown_terminal(int fd, uid_t uid) {
424         struct stat st;
425
426         assert(fd >= 0);
427
428         /* This might fail. What matters are the results. */
429         (void) fchown(fd, uid, -1);
430         (void) fchmod(fd, TTY_MODE);
431
432         if (fstat(fd, &st) < 0)
433                 return -errno;
434
435         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
436                 return -EPERM;
437
438         return 0;
439 }
440
441 static int setup_confirm_stdio(const ExecContext *context,
442                                int *_saved_stdin,
443                                int *_saved_stdout) {
444         int fd = -1, saved_stdin, saved_stdout = -1, r;
445
446         assert(context);
447         assert(_saved_stdin);
448         assert(_saved_stdout);
449
450         /* This returns positive EXIT_xxx return values instead of
451          * negative errno style values! */
452
453         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
454                 return EXIT_STDIN;
455
456         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
457                 r = EXIT_STDOUT;
458                 goto fail;
459         }
460
461         if ((fd = acquire_terminal(
462                              tty_path(context),
463                              context->std_input == EXEC_INPUT_TTY_FAIL,
464                              context->std_input == EXEC_INPUT_TTY_FORCE,
465                              false)) < 0) {
466                 r = EXIT_STDIN;
467                 goto fail;
468         }
469
470         if (chown_terminal(fd, getuid()) < 0) {
471                 r = EXIT_STDIN;
472                 goto fail;
473         }
474
475         if (dup2(fd, STDIN_FILENO) < 0) {
476                 r = EXIT_STDIN;
477                 goto fail;
478         }
479
480         if (dup2(fd, STDOUT_FILENO) < 0) {
481                 r = EXIT_STDOUT;
482                 goto fail;
483         }
484
485         if (fd >= 2)
486                 close_nointr_nofail(fd);
487
488         *_saved_stdin = saved_stdin;
489         *_saved_stdout = saved_stdout;
490
491         return 0;
492
493 fail:
494         if (saved_stdout >= 0)
495                 close_nointr_nofail(saved_stdout);
496
497         if (saved_stdin >= 0)
498                 close_nointr_nofail(saved_stdin);
499
500         if (fd >= 0)
501                 close_nointr_nofail(fd);
502
503         return r;
504 }
505
506 static int restore_confirm_stdio(const ExecContext *context,
507                                  int *saved_stdin,
508                                  int *saved_stdout,
509                                  bool *keep_stdin,
510                                  bool *keep_stdout) {
511
512         assert(context);
513         assert(saved_stdin);
514         assert(*saved_stdin >= 0);
515         assert(saved_stdout);
516         assert(*saved_stdout >= 0);
517
518         /* This returns positive EXIT_xxx return values instead of
519          * negative errno style values! */
520
521         if (is_terminal_input(context->std_input)) {
522
523                 /* The service wants terminal input. */
524
525                 *keep_stdin = true;
526                 *keep_stdout =
527                         context->std_output == EXEC_OUTPUT_INHERIT ||
528                         context->std_output == EXEC_OUTPUT_TTY;
529
530         } else {
531                 /* If the service doesn't want a controlling terminal,
532                  * then we need to get rid entirely of what we have
533                  * already. */
534
535                 if (release_terminal() < 0)
536                         return EXIT_STDIN;
537
538                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
539                         return EXIT_STDIN;
540
541                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
542                         return EXIT_STDOUT;
543
544                 *keep_stdout = *keep_stdin = false;
545         }
546
547         return 0;
548 }
549
550 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
551         bool keep_groups = false;
552         int r;
553
554         assert(context);
555
556         /* Lookup and set GID and supplementary group list. Here too
557          * we avoid NSS lookups for gid=0. */
558
559         if (context->group || username) {
560
561                 if (context->group) {
562                         const char *g = context->group;
563
564                         if ((r = get_group_creds(&g, &gid)) < 0)
565                                 return r;
566                 }
567
568                 /* First step, initialize groups from /etc/groups */
569                 if (username && gid != 0) {
570                         if (initgroups(username, gid) < 0)
571                                 return -errno;
572
573                         keep_groups = true;
574                 }
575
576                 /* Second step, set our gids */
577                 if (setresgid(gid, gid, gid) < 0)
578                         return -errno;
579         }
580
581         if (context->supplementary_groups) {
582                 int ngroups_max, k;
583                 gid_t *gids;
584                 char **i;
585
586                 /* Final step, initialize any manually set supplementary groups */
587                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
588
589                 if (!(gids = new(gid_t, ngroups_max)))
590                         return -ENOMEM;
591
592                 if (keep_groups) {
593                         if ((k = getgroups(ngroups_max, gids)) < 0) {
594                                 free(gids);
595                                 return -errno;
596                         }
597                 } else
598                         k = 0;
599
600                 STRV_FOREACH(i, context->supplementary_groups) {
601                         const char *g;
602
603                         if (k >= ngroups_max) {
604                                 free(gids);
605                                 return -E2BIG;
606                         }
607
608                         g = *i;
609                         r = get_group_creds(&g, gids+k);
610                         if (r < 0) {
611                                 free(gids);
612                                 return r;
613                         }
614
615                         k++;
616                 }
617
618                 if (setgroups(k, gids) < 0) {
619                         free(gids);
620                         return -errno;
621                 }
622
623                 free(gids);
624         }
625
626         return 0;
627 }
628
629 static int enforce_user(const ExecContext *context, uid_t uid) {
630         int r;
631         assert(context);
632
633         /* Sets (but doesn't lookup) the uid and make sure we keep the
634          * capabilities while doing so. */
635
636         if (context->capabilities) {
637                 cap_t d;
638                 static const cap_value_t bits[] = {
639                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
640                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
641                 };
642
643                 /* First step: If we need to keep capabilities but
644                  * drop privileges we need to make sure we keep our
645                  * caps, whiel we drop privileges. */
646                 if (uid != 0) {
647                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
648
649                         if (prctl(PR_GET_SECUREBITS) != sb)
650                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
651                                         return -errno;
652                 }
653
654                 /* Second step: set the capabilities. This will reduce
655                  * the capabilities to the minimum we need. */
656
657                 if (!(d = cap_dup(context->capabilities)))
658                         return -errno;
659
660                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
661                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
662                         r = -errno;
663                         cap_free(d);
664                         return r;
665                 }
666
667                 if (cap_set_proc(d) < 0) {
668                         r = -errno;
669                         cap_free(d);
670                         return r;
671                 }
672
673                 cap_free(d);
674         }
675
676         /* Third step: actually set the uids */
677         if (setresuid(uid, uid, uid) < 0)
678                 return -errno;
679
680         /* At this point we should have all necessary capabilities but
681            are otherwise a normal user. However, the caps might got
682            corrupted due to the setresuid() so we need clean them up
683            later. This is done outside of this call. */
684
685         return 0;
686 }
687
688 #ifdef HAVE_PAM
689
690 static int null_conv(
691                 int num_msg,
692                 const struct pam_message **msg,
693                 struct pam_response **resp,
694                 void *appdata_ptr) {
695
696         /* We don't support conversations */
697
698         return PAM_CONV_ERR;
699 }
700
701 static int setup_pam(
702                 const char *name,
703                 const char *user,
704                 const char *tty,
705                 char ***pam_env,
706                 int fds[], unsigned n_fds) {
707
708         static const struct pam_conv conv = {
709                 .conv = null_conv,
710                 .appdata_ptr = NULL
711         };
712
713         pam_handle_t *handle = NULL;
714         sigset_t ss, old_ss;
715         int pam_code = PAM_SUCCESS;
716         int err;
717         char **e = NULL;
718         bool close_session = false;
719         pid_t pam_pid = 0, parent_pid;
720
721         assert(name);
722         assert(user);
723         assert(pam_env);
724
725         /* We set up PAM in the parent process, then fork. The child
726          * will then stay around until killed via PR_GET_PDEATHSIG or
727          * systemd via the cgroup logic. It will then remove the PAM
728          * session again. The parent process will exec() the actual
729          * daemon. We do things this way to ensure that the main PID
730          * of the daemon is the one we initially fork()ed. */
731
732         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
733                 handle = NULL;
734                 goto fail;
735         }
736
737         if (tty)
738                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
739                         goto fail;
740
741         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
742                 goto fail;
743
744         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
745                 goto fail;
746
747         close_session = true;
748
749         if ((!(e = pam_getenvlist(handle)))) {
750                 pam_code = PAM_BUF_ERR;
751                 goto fail;
752         }
753
754         /* Block SIGTERM, so that we know that it won't get lost in
755          * the child */
756         if (sigemptyset(&ss) < 0 ||
757             sigaddset(&ss, SIGTERM) < 0 ||
758             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
759                 goto fail;
760
761         parent_pid = getpid();
762
763         if ((pam_pid = fork()) < 0)
764                 goto fail;
765
766         if (pam_pid == 0) {
767                 int sig;
768                 int r = EXIT_PAM;
769
770                 /* The child's job is to reset the PAM session on
771                  * termination */
772
773                 /* This string must fit in 10 chars (i.e. the length
774                  * of "/sbin/init"), to look pretty in /bin/ps */
775                 rename_process("(sd-pam)");
776
777                 /* Make sure we don't keep open the passed fds in this
778                 child. We assume that otherwise only those fds are
779                 open here that have been opened by PAM. */
780                 close_many(fds, n_fds);
781
782                 /* Wait until our parent died. This will most likely
783                  * not work since the kernel does not allow
784                  * unprivileged parents kill their privileged children
785                  * this way. We rely on the control groups kill logic
786                  * to do the rest for us. */
787                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
788                         goto child_finish;
789
790                 /* Check if our parent process might already have
791                  * died? */
792                 if (getppid() == parent_pid) {
793                         for (;;) {
794                                 if (sigwait(&ss, &sig) < 0) {
795                                         if (errno == EINTR)
796                                                 continue;
797
798                                         goto child_finish;
799                                 }
800
801                                 assert(sig == SIGTERM);
802                                 break;
803                         }
804                 }
805
806                 /* If our parent died we'll end the session */
807                 if (getppid() != parent_pid)
808                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
809                                 goto child_finish;
810
811                 r = 0;
812
813         child_finish:
814                 pam_end(handle, pam_code | PAM_DATA_SILENT);
815                 _exit(r);
816         }
817
818         /* If the child was forked off successfully it will do all the
819          * cleanups, so forget about the handle here. */
820         handle = NULL;
821
822         /* Unblock SIGTERM again in the parent */
823         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
824                 goto fail;
825
826         /* We close the log explicitly here, since the PAM modules
827          * might have opened it, but we don't want this fd around. */
828         closelog();
829
830         *pam_env = e;
831         e = NULL;
832
833         return 0;
834
835 fail:
836         if (pam_code != PAM_SUCCESS)
837                 err = -EPERM;  /* PAM errors do not map to errno */
838         else
839                 err = -errno;
840
841         if (handle) {
842                 if (close_session)
843                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
844
845                 pam_end(handle, pam_code | PAM_DATA_SILENT);
846         }
847
848         strv_free(e);
849
850         closelog();
851
852         if (pam_pid > 1) {
853                 kill(pam_pid, SIGTERM);
854                 kill(pam_pid, SIGCONT);
855         }
856
857         return err;
858 }
859 #endif
860
861 static int do_capability_bounding_set_drop(uint64_t drop) {
862         unsigned long i;
863         cap_t old_cap = NULL, new_cap = NULL;
864         cap_flag_value_t fv;
865         int r;
866
867         /* If we are run as PID 1 we will lack CAP_SETPCAP by default
868          * in the effective set (yes, the kernel drops that when
869          * executing init!), so get it back temporarily so that we can
870          * call PR_CAPBSET_DROP. */
871
872         old_cap = cap_get_proc();
873         if (!old_cap)
874                 return -errno;
875
876         if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
877                 r = -errno;
878                 goto finish;
879         }
880
881         if (fv != CAP_SET) {
882                 static const cap_value_t v = CAP_SETPCAP;
883
884                 new_cap = cap_dup(old_cap);
885                 if (!new_cap) {
886                         r = -errno;
887                         goto finish;
888                 }
889
890                 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
891                         r = -errno;
892                         goto finish;
893                 }
894
895                 if (cap_set_proc(new_cap) < 0) {
896                         r = -errno;
897                         goto finish;
898                 }
899         }
900
901         for (i = 0; i <= cap_last_cap(); i++)
902                 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
903                         if (prctl(PR_CAPBSET_DROP, i) < 0) {
904                                 r = -errno;
905                                 goto finish;
906                         }
907                 }
908
909         r = 0;
910
911 finish:
912         if (new_cap)
913                 cap_free(new_cap);
914
915         if (old_cap) {
916                 cap_set_proc(old_cap);
917                 cap_free(old_cap);
918         }
919
920         return r;
921 }
922
923 static void rename_process_from_path(const char *path) {
924         char process_name[11];
925         const char *p;
926         size_t l;
927
928         /* This resulting string must fit in 10 chars (i.e. the length
929          * of "/sbin/init") to look pretty in /bin/ps */
930
931         p = file_name_from_path(path);
932         if (isempty(p)) {
933                 rename_process("(...)");
934                 return;
935         }
936
937         l = strlen(p);
938         if (l > 8) {
939                 /* The end of the process name is usually more
940                  * interesting, since the first bit might just be
941                  * "systemd-" */
942                 p = p + l - 8;
943                 l = 8;
944         }
945
946         process_name[0] = '(';
947         memcpy(process_name+1, p, l);
948         process_name[1+l] = ')';
949         process_name[1+l+1] = 0;
950
951         rename_process(process_name);
952 }
953
954 int exec_spawn(ExecCommand *command,
955                char **argv,
956                const ExecContext *context,
957                int fds[], unsigned n_fds,
958                char **environment,
959                bool apply_permissions,
960                bool apply_chroot,
961                bool apply_tty_stdin,
962                bool confirm_spawn,
963                CGroupBonding *cgroup_bondings,
964                CGroupAttribute *cgroup_attributes,
965                const char *cgroup_suffix,
966                pid_t *ret) {
967
968         pid_t pid;
969         int r;
970         char *line;
971         int socket_fd;
972         char **files_env = NULL;
973
974         assert(command);
975         assert(context);
976         assert(ret);
977         assert(fds || n_fds <= 0);
978
979         if (context->std_input == EXEC_INPUT_SOCKET ||
980             context->std_output == EXEC_OUTPUT_SOCKET ||
981             context->std_error == EXEC_OUTPUT_SOCKET) {
982
983                 if (n_fds != 1)
984                         return -EINVAL;
985
986                 socket_fd = fds[0];
987
988                 fds = NULL;
989                 n_fds = 0;
990         } else
991                 socket_fd = -1;
992
993         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
994                 log_error("Failed to load environment files: %s", strerror(-r));
995                 return r;
996         }
997
998         if (!argv)
999                 argv = command->argv;
1000
1001         if (!(line = exec_command_line(argv))) {
1002                 r = -ENOMEM;
1003                 goto fail_parent;
1004         }
1005
1006         log_debug("About to execute: %s", line);
1007         free(line);
1008
1009         r = cgroup_bonding_realize_list(cgroup_bondings);
1010         if (r < 0)
1011                 goto fail_parent;
1012
1013         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1014
1015         if ((pid = fork()) < 0) {
1016                 r = -errno;
1017                 goto fail_parent;
1018         }
1019
1020         if (pid == 0) {
1021                 int i, err;
1022                 sigset_t ss;
1023                 const char *username = NULL, *home = NULL;
1024                 uid_t uid = (uid_t) -1;
1025                 gid_t gid = (gid_t) -1;
1026                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1027                 unsigned n_env = 0;
1028                 int saved_stdout = -1, saved_stdin = -1;
1029                 bool keep_stdout = false, keep_stdin = false, set_access = false;
1030
1031                 /* child */
1032
1033                 rename_process_from_path(command->path);
1034
1035                 /* We reset exactly these signals, since they are the
1036                  * only ones we set to SIG_IGN in the main daemon. All
1037                  * others we leave untouched because we set them to
1038                  * SIG_DFL or a valid handler initially, both of which
1039                  * will be demoted to SIG_DFL. */
1040                 default_signals(SIGNALS_CRASH_HANDLER,
1041                                 SIGNALS_IGNORE, -1);
1042
1043                 if (context->ignore_sigpipe)
1044                         ignore_signals(SIGPIPE, -1);
1045
1046                 assert_se(sigemptyset(&ss) == 0);
1047                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1048                         err = -errno;
1049                         r = EXIT_SIGNAL_MASK;
1050                         goto fail_child;
1051                 }
1052
1053                 /* Close sockets very early to make sure we don't
1054                  * block init reexecution because it cannot bind its
1055                  * sockets */
1056                 log_forget_fds();
1057                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1058                                            socket_fd >= 0 ? 1 : n_fds);
1059                 if (err < 0) {
1060                         r = EXIT_FDS;
1061                         goto fail_child;
1062                 }
1063
1064                 if (!context->same_pgrp)
1065                         if (setsid() < 0) {
1066                                 err = -errno;
1067                                 r = EXIT_SETSID;
1068                                 goto fail_child;
1069                         }
1070
1071                 if (context->tcpwrap_name) {
1072                         if (socket_fd >= 0)
1073                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1074                                         err = -EACCES;
1075                                         r = EXIT_TCPWRAP;
1076                                         goto fail_child;
1077                                 }
1078
1079                         for (i = 0; i < (int) n_fds; i++) {
1080                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1081                                         err = -EACCES;
1082                                         r = EXIT_TCPWRAP;
1083                                         goto fail_child;
1084                                 }
1085                         }
1086                 }
1087
1088                 exec_context_tty_reset(context);
1089
1090                 /* We skip the confirmation step if we shall not apply the TTY */
1091                 if (confirm_spawn &&
1092                     (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1093                         char response;
1094
1095                         /* Set up terminal for the question */
1096                         if ((r = setup_confirm_stdio(context,
1097                                                      &saved_stdin, &saved_stdout))) {
1098                                 err = -errno;
1099                                 goto fail_child;
1100                         }
1101
1102                         /* Now ask the question. */
1103                         if (!(line = exec_command_line(argv))) {
1104                                 err = -ENOMEM;
1105                                 r = EXIT_MEMORY;
1106                                 goto fail_child;
1107                         }
1108
1109                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1110                         free(line);
1111
1112                         if (r < 0 || response == 'n') {
1113                                 err = -ECANCELED;
1114                                 r = EXIT_CONFIRM;
1115                                 goto fail_child;
1116                         } else if (response == 's') {
1117                                 err = r = 0;
1118                                 goto fail_child;
1119                         }
1120
1121                         /* Release terminal for the question */
1122                         if ((r = restore_confirm_stdio(context,
1123                                                        &saved_stdin, &saved_stdout,
1124                                                        &keep_stdin, &keep_stdout))) {
1125                                 err = -errno;
1126                                 goto fail_child;
1127                         }
1128                 }
1129
1130                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1131                  * must sure to drop O_NONBLOCK */
1132                 if (socket_fd >= 0)
1133                         fd_nonblock(socket_fd, false);
1134
1135                 if (!keep_stdin) {
1136                         err = setup_input(context, socket_fd, apply_tty_stdin);
1137                         if (err < 0) {
1138                                 r = EXIT_STDIN;
1139                                 goto fail_child;
1140                         }
1141                 }
1142
1143                 if (!keep_stdout) {
1144                         err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1145                         if (err < 0) {
1146                                 r = EXIT_STDOUT;
1147                                 goto fail_child;
1148                         }
1149                 }
1150
1151                 err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1152                 if (err < 0) {
1153                         r = EXIT_STDERR;
1154                         goto fail_child;
1155                 }
1156
1157                 if (cgroup_bondings) {
1158                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1159                         if (err < 0) {
1160                                 r = EXIT_CGROUP;
1161                                 goto fail_child;
1162                         }
1163                 }
1164
1165                 if (context->oom_score_adjust_set) {
1166                         char t[16];
1167
1168                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1169                         char_array_0(t);
1170
1171                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1172                                 /* Compatibility with Linux <= 2.6.35 */
1173
1174                                 int adj;
1175
1176                                 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1177                                 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1178
1179                                 snprintf(t, sizeof(t), "%i", adj);
1180                                 char_array_0(t);
1181
1182                                 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1183                                     && errno != EACCES) {
1184                                         err = -errno;
1185                                         r = EXIT_OOM_ADJUST;
1186                                         goto fail_child;
1187                                 }
1188                         }
1189                 }
1190
1191                 if (context->nice_set)
1192                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1193                                 err = -errno;
1194                                 r = EXIT_NICE;
1195                                 goto fail_child;
1196                         }
1197
1198                 if (context->cpu_sched_set) {
1199                         struct sched_param param;
1200
1201                         zero(param);
1202                         param.sched_priority = context->cpu_sched_priority;
1203
1204                         if (sched_setscheduler(0, context->cpu_sched_policy |
1205                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1206                                 err = -errno;
1207                                 r = EXIT_SETSCHEDULER;
1208                                 goto fail_child;
1209                         }
1210                 }
1211
1212                 if (context->cpuset)
1213                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1214                                 err = -errno;
1215                                 r = EXIT_CPUAFFINITY;
1216                                 goto fail_child;
1217                         }
1218
1219                 if (context->ioprio_set)
1220                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1221                                 err = -errno;
1222                                 r = EXIT_IOPRIO;
1223                                 goto fail_child;
1224                         }
1225
1226                 if (context->timer_slack_nsec_set)
1227                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1228                                 err = -errno;
1229                                 r = EXIT_TIMERSLACK;
1230                                 goto fail_child;
1231                         }
1232
1233                 if (context->utmp_id)
1234                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1235
1236                 if (context->user) {
1237                         username = context->user;
1238                         err = get_user_creds(&username, &uid, &gid, &home);
1239                         if (err < 0) {
1240                                 r = EXIT_USER;
1241                                 goto fail_child;
1242                         }
1243
1244                         if (is_terminal_input(context->std_input)) {
1245                                 err = chown_terminal(STDIN_FILENO, uid);
1246                                 if (err < 0) {
1247                                         r = EXIT_STDIN;
1248                                         goto fail_child;
1249                                 }
1250                         }
1251
1252                         if (cgroup_bondings && context->control_group_modify) {
1253                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1254                                 if (err >= 0)
1255                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1256                                 if (err < 0) {
1257                                         r = EXIT_CGROUP;
1258                                         goto fail_child;
1259                                 }
1260
1261                                 set_access = true;
1262                         }
1263                 }
1264
1265                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1266                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1267                         if (err < 0) {
1268                                 r = EXIT_CGROUP;
1269                                 goto fail_child;
1270                         }
1271                 }
1272
1273                 if (apply_permissions) {
1274                         err = enforce_groups(context, username, gid);
1275                         if (err < 0) {
1276                                 r = EXIT_GROUP;
1277                                 goto fail_child;
1278                         }
1279                 }
1280
1281                 umask(context->umask);
1282
1283 #ifdef HAVE_PAM
1284                 if (context->pam_name && username) {
1285                         err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1286                         if (err < 0) {
1287                                 r = EXIT_PAM;
1288                                 goto fail_child;
1289                         }
1290                 }
1291 #endif
1292                 if (context->private_network) {
1293                         if (unshare(CLONE_NEWNET) < 0) {
1294                                 err = -errno;
1295                                 r = EXIT_NETWORK;
1296                                 goto fail_child;
1297                         }
1298
1299                         loopback_setup();
1300                 }
1301
1302                 if (strv_length(context->read_write_dirs) > 0 ||
1303                     strv_length(context->read_only_dirs) > 0 ||
1304                     strv_length(context->inaccessible_dirs) > 0 ||
1305                     context->mount_flags != MS_SHARED ||
1306                     context->private_tmp) {
1307                         err = setup_namespace(context->read_write_dirs,
1308                                               context->read_only_dirs,
1309                                               context->inaccessible_dirs,
1310                                               context->private_tmp,
1311                                               context->mount_flags);
1312                         if (err < 0) {
1313                                 r = EXIT_NAMESPACE;
1314                                 goto fail_child;
1315                         }
1316                 }
1317
1318                 if (apply_chroot) {
1319                         if (context->root_directory)
1320                                 if (chroot(context->root_directory) < 0) {
1321                                         err = -errno;
1322                                         r = EXIT_CHROOT;
1323                                         goto fail_child;
1324                                 }
1325
1326                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1327                                 err = -errno;
1328                                 r = EXIT_CHDIR;
1329                                 goto fail_child;
1330                         }
1331                 } else {
1332
1333                         char *d;
1334
1335                         if (asprintf(&d, "%s/%s",
1336                                      context->root_directory ? context->root_directory : "",
1337                                      context->working_directory ? context->working_directory : "") < 0) {
1338                                 err = -ENOMEM;
1339                                 r = EXIT_MEMORY;
1340                                 goto fail_child;
1341                         }
1342
1343                         if (chdir(d) < 0) {
1344                                 err = -errno;
1345                                 free(d);
1346                                 r = EXIT_CHDIR;
1347                                 goto fail_child;
1348                         }
1349
1350                         free(d);
1351                 }
1352
1353                 /* We repeat the fd closing here, to make sure that
1354                  * nothing is leaked from the PAM modules */
1355                 err = close_all_fds(fds, n_fds);
1356                 if (err >= 0)
1357                         err = shift_fds(fds, n_fds);
1358                 if (err >= 0)
1359                         err = flags_fds(fds, n_fds, context->non_blocking);
1360                 if (err < 0) {
1361                         r = EXIT_FDS;
1362                         goto fail_child;
1363                 }
1364
1365                 if (apply_permissions) {
1366
1367                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1368                                 if (!context->rlimit[i])
1369                                         continue;
1370
1371                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1372                                         err = -errno;
1373                                         r = EXIT_LIMITS;
1374                                         goto fail_child;
1375                                 }
1376                         }
1377
1378                         if (context->capability_bounding_set_drop) {
1379                                 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1380                                 if (err < 0) {
1381                                         r = EXIT_CAPABILITIES;
1382                                         goto fail_child;
1383                                 }
1384                         }
1385
1386                         if (context->user) {
1387                                 err = enforce_user(context, uid);
1388                                 if (err < 0) {
1389                                         r = EXIT_USER;
1390                                         goto fail_child;
1391                                 }
1392                         }
1393
1394                         /* PR_GET_SECUREBITS is not privileged, while
1395                          * PR_SET_SECUREBITS is. So to suppress
1396                          * potential EPERMs we'll try not to call
1397                          * PR_SET_SECUREBITS unless necessary. */
1398                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1399                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1400                                         err = -errno;
1401                                         r = EXIT_SECUREBITS;
1402                                         goto fail_child;
1403                                 }
1404
1405                         if (context->capabilities)
1406                                 if (cap_set_proc(context->capabilities) < 0) {
1407                                         err = -errno;
1408                                         r = EXIT_CAPABILITIES;
1409                                         goto fail_child;
1410                                 }
1411                 }
1412
1413                 if (!(our_env = new0(char*, 7))) {
1414                         err = -ENOMEM;
1415                         r = EXIT_MEMORY;
1416                         goto fail_child;
1417                 }
1418
1419                 if (n_fds > 0)
1420                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1421                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1422                                 err = -ENOMEM;
1423                                 r = EXIT_MEMORY;
1424                                 goto fail_child;
1425                         }
1426
1427                 if (home)
1428                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1429                                 err = -ENOMEM;
1430                                 r = EXIT_MEMORY;
1431                                 goto fail_child;
1432                         }
1433
1434                 if (username)
1435                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1436                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1437                                 err = -ENOMEM;
1438                                 r = EXIT_MEMORY;
1439                                 goto fail_child;
1440                         }
1441
1442                 if (is_terminal_input(context->std_input) ||
1443                     context->std_output == EXEC_OUTPUT_TTY ||
1444                     context->std_error == EXEC_OUTPUT_TTY)
1445                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1446                                 err = -ENOMEM;
1447                                 r = EXIT_MEMORY;
1448                                 goto fail_child;
1449                         }
1450
1451                 assert(n_env <= 7);
1452
1453                 if (!(final_env = strv_env_merge(
1454                                       5,
1455                                       environment,
1456                                       our_env,
1457                                       context->environment,
1458                                       files_env,
1459                                       pam_env,
1460                                       NULL))) {
1461                         err = -ENOMEM;
1462                         r = EXIT_MEMORY;
1463                         goto fail_child;
1464                 }
1465
1466                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1467                         err = -ENOMEM;
1468                         r = EXIT_MEMORY;
1469                         goto fail_child;
1470                 }
1471
1472                 final_env = strv_env_clean(final_env);
1473
1474                 execve(command->path, final_argv, final_env);
1475                 err = -errno;
1476                 r = EXIT_EXEC;
1477
1478         fail_child:
1479                 if (r != 0) {
1480                         log_open();
1481                         log_warning("Failed at step %s spawning %s: %s",
1482                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1483                                     command->path, strerror(-err));
1484                 }
1485
1486                 strv_free(our_env);
1487                 strv_free(final_env);
1488                 strv_free(pam_env);
1489                 strv_free(files_env);
1490                 strv_free(final_argv);
1491
1492                 if (saved_stdin >= 0)
1493                         close_nointr_nofail(saved_stdin);
1494
1495                 if (saved_stdout >= 0)
1496                         close_nointr_nofail(saved_stdout);
1497
1498                 _exit(r);
1499         }
1500
1501         strv_free(files_env);
1502
1503         /* We add the new process to the cgroup both in the child (so
1504          * that we can be sure that no user code is ever executed
1505          * outside of the cgroup) and in the parent (so that we can be
1506          * sure that when we kill the cgroup the process will be
1507          * killed too). */
1508         if (cgroup_bondings)
1509                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1510
1511         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1512
1513         exec_status_start(&command->exec_status, pid);
1514
1515         *ret = pid;
1516         return 0;
1517
1518 fail_parent:
1519         strv_free(files_env);
1520
1521         return r;
1522 }
1523
1524 void exec_context_init(ExecContext *c) {
1525         assert(c);
1526
1527         c->umask = 0022;
1528         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1529         c->cpu_sched_policy = SCHED_OTHER;
1530         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1531         c->syslog_level_prefix = true;
1532         c->mount_flags = MS_SHARED;
1533         c->kill_signal = SIGTERM;
1534         c->send_sigkill = true;
1535         c->control_group_persistent = -1;
1536         c->ignore_sigpipe = true;
1537 }
1538
1539 void exec_context_done(ExecContext *c) {
1540         unsigned l;
1541
1542         assert(c);
1543
1544         strv_free(c->environment);
1545         c->environment = NULL;
1546
1547         strv_free(c->environment_files);
1548         c->environment_files = NULL;
1549
1550         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1551                 free(c->rlimit[l]);
1552                 c->rlimit[l] = NULL;
1553         }
1554
1555         free(c->working_directory);
1556         c->working_directory = NULL;
1557         free(c->root_directory);
1558         c->root_directory = NULL;
1559
1560         free(c->tty_path);
1561         c->tty_path = NULL;
1562
1563         free(c->tcpwrap_name);
1564         c->tcpwrap_name = NULL;
1565
1566         free(c->syslog_identifier);
1567         c->syslog_identifier = NULL;
1568
1569         free(c->user);
1570         c->user = NULL;
1571
1572         free(c->group);
1573         c->group = NULL;
1574
1575         strv_free(c->supplementary_groups);
1576         c->supplementary_groups = NULL;
1577
1578         free(c->pam_name);
1579         c->pam_name = NULL;
1580
1581         if (c->capabilities) {
1582                 cap_free(c->capabilities);
1583                 c->capabilities = NULL;
1584         }
1585
1586         strv_free(c->read_only_dirs);
1587         c->read_only_dirs = NULL;
1588
1589         strv_free(c->read_write_dirs);
1590         c->read_write_dirs = NULL;
1591
1592         strv_free(c->inaccessible_dirs);
1593         c->inaccessible_dirs = NULL;
1594
1595         if (c->cpuset)
1596                 CPU_FREE(c->cpuset);
1597
1598         free(c->utmp_id);
1599         c->utmp_id = NULL;
1600 }
1601
1602 void exec_command_done(ExecCommand *c) {
1603         assert(c);
1604
1605         free(c->path);
1606         c->path = NULL;
1607
1608         strv_free(c->argv);
1609         c->argv = NULL;
1610 }
1611
1612 void exec_command_done_array(ExecCommand *c, unsigned n) {
1613         unsigned i;
1614
1615         for (i = 0; i < n; i++)
1616                 exec_command_done(c+i);
1617 }
1618
1619 void exec_command_free_list(ExecCommand *c) {
1620         ExecCommand *i;
1621
1622         while ((i = c)) {
1623                 LIST_REMOVE(ExecCommand, command, c, i);
1624                 exec_command_done(i);
1625                 free(i);
1626         }
1627 }
1628
1629 void exec_command_free_array(ExecCommand **c, unsigned n) {
1630         unsigned i;
1631
1632         for (i = 0; i < n; i++) {
1633                 exec_command_free_list(c[i]);
1634                 c[i] = NULL;
1635         }
1636 }
1637
1638 int exec_context_load_environment(const ExecContext *c, char ***l) {
1639         char **i, **r = NULL;
1640
1641         assert(c);
1642         assert(l);
1643
1644         STRV_FOREACH(i, c->environment_files) {
1645                 char *fn;
1646                 int k;
1647                 bool ignore = false;
1648                 char **p;
1649
1650                 fn = *i;
1651
1652                 if (fn[0] == '-') {
1653                         ignore = true;
1654                         fn ++;
1655                 }
1656
1657                 if (!path_is_absolute(fn)) {
1658
1659                         if (ignore)
1660                                 continue;
1661
1662                         strv_free(r);
1663                         return -EINVAL;
1664                 }
1665
1666                 if ((k = load_env_file(fn, &p)) < 0) {
1667
1668                         if (ignore)
1669                                 continue;
1670
1671                         strv_free(r);
1672                         return k;
1673                 }
1674
1675                 if (r == NULL)
1676                         r = p;
1677                 else {
1678                         char **m;
1679
1680                         m = strv_env_merge(2, r, p);
1681                         strv_free(r);
1682                         strv_free(p);
1683
1684                         if (!m)
1685                                 return -ENOMEM;
1686
1687                         r = m;
1688                 }
1689         }
1690
1691         *l = r;
1692
1693         return 0;
1694 }
1695
1696 static void strv_fprintf(FILE *f, char **l) {
1697         char **g;
1698
1699         assert(f);
1700
1701         STRV_FOREACH(g, l)
1702                 fprintf(f, " %s", *g);
1703 }
1704
1705 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1706         char ** e;
1707         unsigned i;
1708
1709         assert(c);
1710         assert(f);
1711
1712         if (!prefix)
1713                 prefix = "";
1714
1715         fprintf(f,
1716                 "%sUMask: %04o\n"
1717                 "%sWorkingDirectory: %s\n"
1718                 "%sRootDirectory: %s\n"
1719                 "%sNonBlocking: %s\n"
1720                 "%sPrivateTmp: %s\n"
1721                 "%sControlGroupModify: %s\n"
1722                 "%sControlGroupPersistent: %s\n"
1723                 "%sPrivateNetwork: %s\n",
1724                 prefix, c->umask,
1725                 prefix, c->working_directory ? c->working_directory : "/",
1726                 prefix, c->root_directory ? c->root_directory : "/",
1727                 prefix, yes_no(c->non_blocking),
1728                 prefix, yes_no(c->private_tmp),
1729                 prefix, yes_no(c->control_group_modify),
1730                 prefix, yes_no(c->control_group_persistent),
1731                 prefix, yes_no(c->private_network));
1732
1733         STRV_FOREACH(e, c->environment)
1734                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1735
1736         STRV_FOREACH(e, c->environment_files)
1737                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1738
1739         if (c->tcpwrap_name)
1740                 fprintf(f,
1741                         "%sTCPWrapName: %s\n",
1742                         prefix, c->tcpwrap_name);
1743
1744         if (c->nice_set)
1745                 fprintf(f,
1746                         "%sNice: %i\n",
1747                         prefix, c->nice);
1748
1749         if (c->oom_score_adjust_set)
1750                 fprintf(f,
1751                         "%sOOMScoreAdjust: %i\n",
1752                         prefix, c->oom_score_adjust);
1753
1754         for (i = 0; i < RLIM_NLIMITS; i++)
1755                 if (c->rlimit[i])
1756                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1757
1758         if (c->ioprio_set)
1759                 fprintf(f,
1760                         "%sIOSchedulingClass: %s\n"
1761                         "%sIOPriority: %i\n",
1762                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1763                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1764
1765         if (c->cpu_sched_set)
1766                 fprintf(f,
1767                         "%sCPUSchedulingPolicy: %s\n"
1768                         "%sCPUSchedulingPriority: %i\n"
1769                         "%sCPUSchedulingResetOnFork: %s\n",
1770                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1771                         prefix, c->cpu_sched_priority,
1772                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1773
1774         if (c->cpuset) {
1775                 fprintf(f, "%sCPUAffinity:", prefix);
1776                 for (i = 0; i < c->cpuset_ncpus; i++)
1777                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1778                                 fprintf(f, " %i", i);
1779                 fputs("\n", f);
1780         }
1781
1782         if (c->timer_slack_nsec_set)
1783                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1784
1785         fprintf(f,
1786                 "%sStandardInput: %s\n"
1787                 "%sStandardOutput: %s\n"
1788                 "%sStandardError: %s\n",
1789                 prefix, exec_input_to_string(c->std_input),
1790                 prefix, exec_output_to_string(c->std_output),
1791                 prefix, exec_output_to_string(c->std_error));
1792
1793         if (c->tty_path)
1794                 fprintf(f,
1795                         "%sTTYPath: %s\n"
1796                         "%sTTYReset: %s\n"
1797                         "%sTTYVHangup: %s\n"
1798                         "%sTTYVTDisallocate: %s\n",
1799                         prefix, c->tty_path,
1800                         prefix, yes_no(c->tty_reset),
1801                         prefix, yes_no(c->tty_vhangup),
1802                         prefix, yes_no(c->tty_vt_disallocate));
1803
1804         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1805             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1806             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1807             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1808                 fprintf(f,
1809                         "%sSyslogFacility: %s\n"
1810                         "%sSyslogLevel: %s\n",
1811                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1812                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1813
1814         if (c->capabilities) {
1815                 char *t;
1816                 if ((t = cap_to_text(c->capabilities, NULL))) {
1817                         fprintf(f, "%sCapabilities: %s\n",
1818                                 prefix, t);
1819                         cap_free(t);
1820                 }
1821         }
1822
1823         if (c->secure_bits)
1824                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1825                         prefix,
1826                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1827                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1828                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1829                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1830                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1831                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1832
1833         if (c->capability_bounding_set_drop) {
1834                 unsigned long l;
1835                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1836
1837                 for (l = 0; l <= cap_last_cap(); l++)
1838                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1839                                 char *t;
1840
1841                                 if ((t = cap_to_name(l))) {
1842                                         fprintf(f, " %s", t);
1843                                         cap_free(t);
1844                                 }
1845                         }
1846
1847                 fputs("\n", f);
1848         }
1849
1850         if (c->user)
1851                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1852         if (c->group)
1853                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1854
1855         if (strv_length(c->supplementary_groups) > 0) {
1856                 fprintf(f, "%sSupplementaryGroups:", prefix);
1857                 strv_fprintf(f, c->supplementary_groups);
1858                 fputs("\n", f);
1859         }
1860
1861         if (c->pam_name)
1862                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1863
1864         if (strv_length(c->read_write_dirs) > 0) {
1865                 fprintf(f, "%sReadWriteDirs:", prefix);
1866                 strv_fprintf(f, c->read_write_dirs);
1867                 fputs("\n", f);
1868         }
1869
1870         if (strv_length(c->read_only_dirs) > 0) {
1871                 fprintf(f, "%sReadOnlyDirs:", prefix);
1872                 strv_fprintf(f, c->read_only_dirs);
1873                 fputs("\n", f);
1874         }
1875
1876         if (strv_length(c->inaccessible_dirs) > 0) {
1877                 fprintf(f, "%sInaccessibleDirs:", prefix);
1878                 strv_fprintf(f, c->inaccessible_dirs);
1879                 fputs("\n", f);
1880         }
1881
1882         fprintf(f,
1883                 "%sKillMode: %s\n"
1884                 "%sKillSignal: SIG%s\n"
1885                 "%sSendSIGKILL: %s\n"
1886                 "%sIgnoreSIGPIPE: %s\n",
1887                 prefix, kill_mode_to_string(c->kill_mode),
1888                 prefix, signal_to_string(c->kill_signal),
1889                 prefix, yes_no(c->send_sigkill),
1890                 prefix, yes_no(c->ignore_sigpipe));
1891
1892         if (c->utmp_id)
1893                 fprintf(f,
1894                         "%sUtmpIdentifier: %s\n",
1895                         prefix, c->utmp_id);
1896 }
1897
1898 void exec_status_start(ExecStatus *s, pid_t pid) {
1899         assert(s);
1900
1901         zero(*s);
1902         s->pid = pid;
1903         dual_timestamp_get(&s->start_timestamp);
1904 }
1905
1906 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1907         assert(s);
1908
1909         if (s->pid && s->pid != pid)
1910                 zero(*s);
1911
1912         s->pid = pid;
1913         dual_timestamp_get(&s->exit_timestamp);
1914
1915         s->code = code;
1916         s->status = status;
1917
1918         if (context) {
1919                 if (context->utmp_id)
1920                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1921
1922                 exec_context_tty_reset(context);
1923         }
1924 }
1925
1926 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1927         char buf[FORMAT_TIMESTAMP_MAX];
1928
1929         assert(s);
1930         assert(f);
1931
1932         if (!prefix)
1933                 prefix = "";
1934
1935         if (s->pid <= 0)
1936                 return;
1937
1938         fprintf(f,
1939                 "%sPID: %lu\n",
1940                 prefix, (unsigned long) s->pid);
1941
1942         if (s->start_timestamp.realtime > 0)
1943                 fprintf(f,
1944                         "%sStart Timestamp: %s\n",
1945                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1946
1947         if (s->exit_timestamp.realtime > 0)
1948                 fprintf(f,
1949                         "%sExit Timestamp: %s\n"
1950                         "%sExit Code: %s\n"
1951                         "%sExit Status: %i\n",
1952                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1953                         prefix, sigchld_code_to_string(s->code),
1954                         prefix, s->status);
1955 }
1956
1957 char *exec_command_line(char **argv) {
1958         size_t k;
1959         char *n, *p, **a;
1960         bool first = true;
1961
1962         assert(argv);
1963
1964         k = 1;
1965         STRV_FOREACH(a, argv)
1966                 k += strlen(*a)+3;
1967
1968         if (!(n = new(char, k)))
1969                 return NULL;
1970
1971         p = n;
1972         STRV_FOREACH(a, argv) {
1973
1974                 if (!first)
1975                         *(p++) = ' ';
1976                 else
1977                         first = false;
1978
1979                 if (strpbrk(*a, WHITESPACE)) {
1980                         *(p++) = '\'';
1981                         p = stpcpy(p, *a);
1982                         *(p++) = '\'';
1983                 } else
1984                         p = stpcpy(p, *a);
1985
1986         }
1987
1988         *p = 0;
1989
1990         /* FIXME: this doesn't really handle arguments that have
1991          * spaces and ticks in them */
1992
1993         return n;
1994 }
1995
1996 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1997         char *p2;
1998         const char *prefix2;
1999
2000         char *cmd;
2001
2002         assert(c);
2003         assert(f);
2004
2005         if (!prefix)
2006                 prefix = "";
2007         p2 = strappend(prefix, "\t");
2008         prefix2 = p2 ? p2 : prefix;
2009
2010         cmd = exec_command_line(c->argv);
2011
2012         fprintf(f,
2013                 "%sCommand Line: %s\n",
2014                 prefix, cmd ? cmd : strerror(ENOMEM));
2015
2016         free(cmd);
2017
2018         exec_status_dump(&c->exec_status, f, prefix2);
2019
2020         free(p2);
2021 }
2022
2023 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2024         assert(f);
2025
2026         if (!prefix)
2027                 prefix = "";
2028
2029         LIST_FOREACH(command, c, c)
2030                 exec_command_dump(c, f, prefix);
2031 }
2032
2033 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2034         ExecCommand *end;
2035
2036         assert(l);
2037         assert(e);
2038
2039         if (*l) {
2040                 /* It's kind of important, that we keep the order here */
2041                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2042                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2043         } else
2044               *l = e;
2045 }
2046
2047 int exec_command_set(ExecCommand *c, const char *path, ...) {
2048         va_list ap;
2049         char **l, *p;
2050
2051         assert(c);
2052         assert(path);
2053
2054         va_start(ap, path);
2055         l = strv_new_ap(path, ap);
2056         va_end(ap);
2057
2058         if (!l)
2059                 return -ENOMEM;
2060
2061         if (!(p = strdup(path))) {
2062                 strv_free(l);
2063                 return -ENOMEM;
2064         }
2065
2066         free(c->path);
2067         c->path = p;
2068
2069         strv_free(c->argv);
2070         c->argv = l;
2071
2072         return 0;
2073 }
2074
2075 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2076         [EXEC_INPUT_NULL] = "null",
2077         [EXEC_INPUT_TTY] = "tty",
2078         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2079         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2080         [EXEC_INPUT_SOCKET] = "socket"
2081 };
2082
2083 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2084
2085 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2086         [EXEC_OUTPUT_INHERIT] = "inherit",
2087         [EXEC_OUTPUT_NULL] = "null",
2088         [EXEC_OUTPUT_TTY] = "tty",
2089         [EXEC_OUTPUT_SYSLOG] = "syslog",
2090         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2091         [EXEC_OUTPUT_KMSG] = "kmsg",
2092         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2093         [EXEC_OUTPUT_JOURNAL] = "journal",
2094         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2095         [EXEC_OUTPUT_SOCKET] = "socket"
2096 };
2097
2098 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2099
2100 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2101         [KILL_CONTROL_GROUP] = "control-group",
2102         [KILL_PROCESS] = "process",
2103         [KILL_NONE] = "none"
2104 };
2105
2106 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2107
2108 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2109         [KILL_MAIN] = "main",
2110         [KILL_CONTROL] = "control",
2111         [KILL_ALL] = "all"
2112 };
2113
2114 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);