chiark / gitweb /
4d4091940f9e13018940d2beaf1bcb59116563f6
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41
42 #ifdef HAVE_PAM
43 #include <security/pam_appl.h>
44 #endif
45
46 #include "execute.h"
47 #include "strv.h"
48 #include "macro.h"
49 #include "capability.h"
50 #include "util.h"
51 #include "log.h"
52 #include "ioprio.h"
53 #include "securebits.h"
54 #include "cgroup.h"
55 #include "namespace.h"
56 #include "tcpwrap.h"
57 #include "exit-status.h"
58 #include "missing.h"
59 #include "utmp-wtmp.h"
60 #include "def.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
63
64 /* This assumes there is a 'tty' group */
65 #define TTY_MODE 0620
66
67 static int shift_fds(int fds[], unsigned n_fds) {
68         int start, restart_from;
69
70         if (n_fds <= 0)
71                 return 0;
72
73         /* Modifies the fds array! (sorts it) */
74
75         assert(fds);
76
77         start = 0;
78         for (;;) {
79                 int i;
80
81                 restart_from = -1;
82
83                 for (i = start; i < (int) n_fds; i++) {
84                         int nfd;
85
86                         /* Already at right index? */
87                         if (fds[i] == i+3)
88                                 continue;
89
90                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
91                                 return -errno;
92
93                         close_nointr_nofail(fds[i]);
94                         fds[i] = nfd;
95
96                         /* Hmm, the fd we wanted isn't free? Then
97                          * let's remember that and try again from here*/
98                         if (nfd != i+3 && restart_from < 0)
99                                 restart_from = i;
100                 }
101
102                 if (restart_from < 0)
103                         break;
104
105                 start = restart_from;
106         }
107
108         return 0;
109 }
110
111 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
112         unsigned i;
113         int r;
114
115         if (n_fds <= 0)
116                 return 0;
117
118         assert(fds);
119
120         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
121
122         for (i = 0; i < n_fds; i++) {
123
124                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
125                         return r;
126
127                 /* We unconditionally drop FD_CLOEXEC from the fds,
128                  * since after all we want to pass these fds to our
129                  * children */
130
131                 if ((r = fd_cloexec(fds[i], false)) < 0)
132                         return r;
133         }
134
135         return 0;
136 }
137
138 static const char *tty_path(const ExecContext *context) {
139         assert(context);
140
141         if (context->tty_path)
142                 return context->tty_path;
143
144         return "/dev/console";
145 }
146
147 void exec_context_tty_reset(const ExecContext *context) {
148         assert(context);
149
150         if (context->tty_vhangup)
151                 terminal_vhangup(tty_path(context));
152
153         if (context->tty_reset)
154                 reset_terminal(tty_path(context));
155
156         if (context->tty_vt_disallocate && context->tty_path)
157                 vt_disallocate(context->tty_path);
158 }
159
160 static int open_null_as(int flags, int nfd) {
161         int fd, r;
162
163         assert(nfd >= 0);
164
165         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
166                 return -errno;
167
168         if (fd != nfd) {
169                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
170                 close_nointr_nofail(fd);
171         } else
172                 r = nfd;
173
174         return r;
175 }
176
177 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
178         int fd, r;
179         union sockaddr_union sa;
180
181         assert(context);
182         assert(output < _EXEC_OUTPUT_MAX);
183         assert(ident);
184         assert(nfd >= 0);
185
186         fd = socket(AF_UNIX, SOCK_STREAM, 0);
187         if (fd < 0)
188                 return -errno;
189
190         zero(sa);
191         sa.un.sun_family = AF_UNIX;
192         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
193
194         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
195         if (r < 0) {
196                 close_nointr_nofail(fd);
197                 return -errno;
198         }
199
200         if (shutdown(fd, SHUT_RD) < 0) {
201                 close_nointr_nofail(fd);
202                 return -errno;
203         }
204
205         dprintf(fd,
206                 "%s\n"
207                 "%i\n"
208                 "%i\n"
209                 "%i\n"
210                 "%i\n"
211                 "%i\n",
212                 context->syslog_identifier ? context->syslog_identifier : ident,
213                 context->syslog_priority,
214                 !!context->syslog_level_prefix,
215                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
216                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
217                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
218
219         if (fd != nfd) {
220                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
221                 close_nointr_nofail(fd);
222         } else
223                 r = nfd;
224
225         return r;
226 }
227 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
228         int fd, r;
229
230         assert(path);
231         assert(nfd >= 0);
232
233         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
234                 return fd;
235
236         if (fd != nfd) {
237                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
238                 close_nointr_nofail(fd);
239         } else
240                 r = nfd;
241
242         return r;
243 }
244
245 static bool is_terminal_input(ExecInput i) {
246         return
247                 i == EXEC_INPUT_TTY ||
248                 i == EXEC_INPUT_TTY_FORCE ||
249                 i == EXEC_INPUT_TTY_FAIL;
250 }
251
252 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
253
254         if (is_terminal_input(std_input) && !apply_tty_stdin)
255                 return EXEC_INPUT_NULL;
256
257         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
258                 return EXEC_INPUT_NULL;
259
260         return std_input;
261 }
262
263 static int fixup_output(ExecOutput std_output, int socket_fd) {
264
265         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
266                 return EXEC_OUTPUT_INHERIT;
267
268         return std_output;
269 }
270
271 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
272         ExecInput i;
273
274         assert(context);
275
276         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
277
278         switch (i) {
279
280         case EXEC_INPUT_NULL:
281                 return open_null_as(O_RDONLY, STDIN_FILENO);
282
283         case EXEC_INPUT_TTY:
284         case EXEC_INPUT_TTY_FORCE:
285         case EXEC_INPUT_TTY_FAIL: {
286                 int fd, r;
287
288                 if ((fd = acquire_terminal(
289                                      tty_path(context),
290                                      i == EXEC_INPUT_TTY_FAIL,
291                                      i == EXEC_INPUT_TTY_FORCE,
292                                      false)) < 0)
293                         return fd;
294
295                 if (fd != STDIN_FILENO) {
296                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
297                         close_nointr_nofail(fd);
298                 } else
299                         r = STDIN_FILENO;
300
301                 return r;
302         }
303
304         case EXEC_INPUT_SOCKET:
305                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306
307         default:
308                 assert_not_reached("Unknown input type");
309         }
310 }
311
312 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
313         ExecOutput o;
314         ExecInput i;
315
316         assert(context);
317         assert(ident);
318
319         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
320         o = fixup_output(context->std_output, socket_fd);
321
322         /* This expects the input is already set up */
323
324         switch (o) {
325
326         case EXEC_OUTPUT_INHERIT:
327
328                 /* If input got downgraded, inherit the original value */
329                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
330                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
331
332                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
333                 if (i != EXEC_INPUT_NULL)
334                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
335
336                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
337                 if (getppid() != 1)
338                         return STDOUT_FILENO;
339
340                 /* We need to open /dev/null here anew, to get the
341                  * right access mode. So we fall through */
342
343         case EXEC_OUTPUT_NULL:
344                 return open_null_as(O_WRONLY, STDOUT_FILENO);
345
346         case EXEC_OUTPUT_TTY:
347                 if (is_terminal_input(i))
348                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
349
350                 /* We don't reset the terminal if this is just about output */
351                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
352
353         case EXEC_OUTPUT_SYSLOG:
354         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
355         case EXEC_OUTPUT_KMSG:
356         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
357         case EXEC_OUTPUT_JOURNAL:
358         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
359                 return connect_logger_as(context, o, ident, STDOUT_FILENO);
360
361         case EXEC_OUTPUT_SOCKET:
362                 assert(socket_fd >= 0);
363                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
364
365         default:
366                 assert_not_reached("Unknown output type");
367         }
368 }
369
370 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
371         ExecOutput o, e;
372         ExecInput i;
373
374         assert(context);
375         assert(ident);
376
377         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
378         o = fixup_output(context->std_output, socket_fd);
379         e = fixup_output(context->std_error, socket_fd);
380
381         /* This expects the input and output are already set up */
382
383         /* Don't change the stderr file descriptor if we inherit all
384          * the way and are not on a tty */
385         if (e == EXEC_OUTPUT_INHERIT &&
386             o == EXEC_OUTPUT_INHERIT &&
387             i == EXEC_INPUT_NULL &&
388             !is_terminal_input(context->std_input) &&
389             getppid () != 1)
390                 return STDERR_FILENO;
391
392         /* Duplicate from stdout if possible */
393         if (e == o || e == EXEC_OUTPUT_INHERIT)
394                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
395
396         switch (e) {
397
398         case EXEC_OUTPUT_NULL:
399                 return open_null_as(O_WRONLY, STDERR_FILENO);
400
401         case EXEC_OUTPUT_TTY:
402                 if (is_terminal_input(i))
403                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
404
405                 /* We don't reset the terminal if this is just about output */
406                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
407
408         case EXEC_OUTPUT_SYSLOG:
409         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
410         case EXEC_OUTPUT_KMSG:
411         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
412         case EXEC_OUTPUT_JOURNAL:
413         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
414                 return connect_logger_as(context, e, ident, STDERR_FILENO);
415
416         case EXEC_OUTPUT_SOCKET:
417                 assert(socket_fd >= 0);
418                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
419
420         default:
421                 assert_not_reached("Unknown error type");
422         }
423 }
424
425 static int chown_terminal(int fd, uid_t uid) {
426         struct stat st;
427
428         assert(fd >= 0);
429
430         /* This might fail. What matters are the results. */
431         (void) fchown(fd, uid, -1);
432         (void) fchmod(fd, TTY_MODE);
433
434         if (fstat(fd, &st) < 0)
435                 return -errno;
436
437         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
438                 return -EPERM;
439
440         return 0;
441 }
442
443 static int setup_confirm_stdio(const ExecContext *context,
444                                int *_saved_stdin,
445                                int *_saved_stdout) {
446         int fd = -1, saved_stdin, saved_stdout = -1, r;
447
448         assert(context);
449         assert(_saved_stdin);
450         assert(_saved_stdout);
451
452         /* This returns positive EXIT_xxx return values instead of
453          * negative errno style values! */
454
455         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
456                 return EXIT_STDIN;
457
458         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
459                 r = EXIT_STDOUT;
460                 goto fail;
461         }
462
463         if ((fd = acquire_terminal(
464                              tty_path(context),
465                              context->std_input == EXEC_INPUT_TTY_FAIL,
466                              context->std_input == EXEC_INPUT_TTY_FORCE,
467                              false)) < 0) {
468                 r = EXIT_STDIN;
469                 goto fail;
470         }
471
472         if (chown_terminal(fd, getuid()) < 0) {
473                 r = EXIT_STDIN;
474                 goto fail;
475         }
476
477         if (dup2(fd, STDIN_FILENO) < 0) {
478                 r = EXIT_STDIN;
479                 goto fail;
480         }
481
482         if (dup2(fd, STDOUT_FILENO) < 0) {
483                 r = EXIT_STDOUT;
484                 goto fail;
485         }
486
487         if (fd >= 2)
488                 close_nointr_nofail(fd);
489
490         *_saved_stdin = saved_stdin;
491         *_saved_stdout = saved_stdout;
492
493         return 0;
494
495 fail:
496         if (saved_stdout >= 0)
497                 close_nointr_nofail(saved_stdout);
498
499         if (saved_stdin >= 0)
500                 close_nointr_nofail(saved_stdin);
501
502         if (fd >= 0)
503                 close_nointr_nofail(fd);
504
505         return r;
506 }
507
508 static int restore_confirm_stdio(const ExecContext *context,
509                                  int *saved_stdin,
510                                  int *saved_stdout,
511                                  bool *keep_stdin,
512                                  bool *keep_stdout) {
513
514         assert(context);
515         assert(saved_stdin);
516         assert(*saved_stdin >= 0);
517         assert(saved_stdout);
518         assert(*saved_stdout >= 0);
519
520         /* This returns positive EXIT_xxx return values instead of
521          * negative errno style values! */
522
523         if (is_terminal_input(context->std_input)) {
524
525                 /* The service wants terminal input. */
526
527                 *keep_stdin = true;
528                 *keep_stdout =
529                         context->std_output == EXEC_OUTPUT_INHERIT ||
530                         context->std_output == EXEC_OUTPUT_TTY;
531
532         } else {
533                 /* If the service doesn't want a controlling terminal,
534                  * then we need to get rid entirely of what we have
535                  * already. */
536
537                 if (release_terminal() < 0)
538                         return EXIT_STDIN;
539
540                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
541                         return EXIT_STDIN;
542
543                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
544                         return EXIT_STDOUT;
545
546                 *keep_stdout = *keep_stdin = false;
547         }
548
549         return 0;
550 }
551
552 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
553         bool keep_groups = false;
554         int r;
555
556         assert(context);
557
558         /* Lookup and set GID and supplementary group list. Here too
559          * we avoid NSS lookups for gid=0. */
560
561         if (context->group || username) {
562
563                 if (context->group) {
564                         const char *g = context->group;
565
566                         if ((r = get_group_creds(&g, &gid)) < 0)
567                                 return r;
568                 }
569
570                 /* First step, initialize groups from /etc/groups */
571                 if (username && gid != 0) {
572                         if (initgroups(username, gid) < 0)
573                                 return -errno;
574
575                         keep_groups = true;
576                 }
577
578                 /* Second step, set our gids */
579                 if (setresgid(gid, gid, gid) < 0)
580                         return -errno;
581         }
582
583         if (context->supplementary_groups) {
584                 int ngroups_max, k;
585                 gid_t *gids;
586                 char **i;
587
588                 /* Final step, initialize any manually set supplementary groups */
589                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
590
591                 if (!(gids = new(gid_t, ngroups_max)))
592                         return -ENOMEM;
593
594                 if (keep_groups) {
595                         if ((k = getgroups(ngroups_max, gids)) < 0) {
596                                 free(gids);
597                                 return -errno;
598                         }
599                 } else
600                         k = 0;
601
602                 STRV_FOREACH(i, context->supplementary_groups) {
603                         const char *g;
604
605                         if (k >= ngroups_max) {
606                                 free(gids);
607                                 return -E2BIG;
608                         }
609
610                         g = *i;
611                         r = get_group_creds(&g, gids+k);
612                         if (r < 0) {
613                                 free(gids);
614                                 return r;
615                         }
616
617                         k++;
618                 }
619
620                 if (setgroups(k, gids) < 0) {
621                         free(gids);
622                         return -errno;
623                 }
624
625                 free(gids);
626         }
627
628         return 0;
629 }
630
631 static int enforce_user(const ExecContext *context, uid_t uid) {
632         int r;
633         assert(context);
634
635         /* Sets (but doesn't lookup) the uid and make sure we keep the
636          * capabilities while doing so. */
637
638         if (context->capabilities) {
639                 cap_t d;
640                 static const cap_value_t bits[] = {
641                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
642                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
643                 };
644
645                 /* First step: If we need to keep capabilities but
646                  * drop privileges we need to make sure we keep our
647                  * caps, whiel we drop privileges. */
648                 if (uid != 0) {
649                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
650
651                         if (prctl(PR_GET_SECUREBITS) != sb)
652                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
653                                         return -errno;
654                 }
655
656                 /* Second step: set the capabilities. This will reduce
657                  * the capabilities to the minimum we need. */
658
659                 if (!(d = cap_dup(context->capabilities)))
660                         return -errno;
661
662                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
663                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
664                         r = -errno;
665                         cap_free(d);
666                         return r;
667                 }
668
669                 if (cap_set_proc(d) < 0) {
670                         r = -errno;
671                         cap_free(d);
672                         return r;
673                 }
674
675                 cap_free(d);
676         }
677
678         /* Third step: actually set the uids */
679         if (setresuid(uid, uid, uid) < 0)
680                 return -errno;
681
682         /* At this point we should have all necessary capabilities but
683            are otherwise a normal user. However, the caps might got
684            corrupted due to the setresuid() so we need clean them up
685            later. This is done outside of this call. */
686
687         return 0;
688 }
689
690 #ifdef HAVE_PAM
691
692 static int null_conv(
693                 int num_msg,
694                 const struct pam_message **msg,
695                 struct pam_response **resp,
696                 void *appdata_ptr) {
697
698         /* We don't support conversations */
699
700         return PAM_CONV_ERR;
701 }
702
703 static int setup_pam(
704                 const char *name,
705                 const char *user,
706                 uid_t uid,
707                 const char *tty,
708                 char ***pam_env,
709                 int fds[], unsigned n_fds) {
710
711         static const struct pam_conv conv = {
712                 .conv = null_conv,
713                 .appdata_ptr = NULL
714         };
715
716         pam_handle_t *handle = NULL;
717         sigset_t ss, old_ss;
718         int pam_code = PAM_SUCCESS;
719         int err;
720         char **e = NULL;
721         bool close_session = false;
722         pid_t pam_pid = 0, parent_pid;
723
724         assert(name);
725         assert(user);
726         assert(pam_env);
727
728         /* We set up PAM in the parent process, then fork. The child
729          * will then stay around until killed via PR_GET_PDEATHSIG or
730          * systemd via the cgroup logic. It will then remove the PAM
731          * session again. The parent process will exec() the actual
732          * daemon. We do things this way to ensure that the main PID
733          * of the daemon is the one we initially fork()ed. */
734
735         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
736                 handle = NULL;
737                 goto fail;
738         }
739
740         if (tty)
741                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
742                         goto fail;
743
744         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
745                 goto fail;
746
747         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
748                 goto fail;
749
750         close_session = true;
751
752         if ((!(e = pam_getenvlist(handle)))) {
753                 pam_code = PAM_BUF_ERR;
754                 goto fail;
755         }
756
757         /* Block SIGTERM, so that we know that it won't get lost in
758          * the child */
759         if (sigemptyset(&ss) < 0 ||
760             sigaddset(&ss, SIGTERM) < 0 ||
761             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
762                 goto fail;
763
764         parent_pid = getpid();
765
766         if ((pam_pid = fork()) < 0)
767                 goto fail;
768
769         if (pam_pid == 0) {
770                 int sig;
771                 int r = EXIT_PAM;
772
773                 /* The child's job is to reset the PAM session on
774                  * termination */
775
776                 /* This string must fit in 10 chars (i.e. the length
777                  * of "/sbin/init"), to look pretty in /bin/ps */
778                 rename_process("(sd-pam)");
779
780                 /* Make sure we don't keep open the passed fds in this
781                 child. We assume that otherwise only those fds are
782                 open here that have been opened by PAM. */
783                 close_many(fds, n_fds);
784
785                 /* Drop privileges - we don't need any to pam_close_session
786                  * and this will make PR_SET_PDEATHSIG work in most cases.
787                  * If this fails, ignore the error - but expect sd-pam threads
788                  * to fail to exit normally */
789                 if (setresuid(uid, uid, uid) < 0)
790                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
791
792                 /* Wait until our parent died. This will only work if
793                  * the above setresuid() succeeds, otherwise the kernel
794                  * will not allow unprivileged parents kill their privileged
795                  * children this way. We rely on the control groups kill logic
796                  * to do the rest for us. */
797                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
798                         goto child_finish;
799
800                 /* Check if our parent process might already have
801                  * died? */
802                 if (getppid() == parent_pid) {
803                         for (;;) {
804                                 if (sigwait(&ss, &sig) < 0) {
805                                         if (errno == EINTR)
806                                                 continue;
807
808                                         goto child_finish;
809                                 }
810
811                                 assert(sig == SIGTERM);
812                                 break;
813                         }
814                 }
815
816                 /* If our parent died we'll end the session */
817                 if (getppid() != parent_pid)
818                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
819                                 goto child_finish;
820
821                 r = 0;
822
823         child_finish:
824                 pam_end(handle, pam_code | PAM_DATA_SILENT);
825                 _exit(r);
826         }
827
828         /* If the child was forked off successfully it will do all the
829          * cleanups, so forget about the handle here. */
830         handle = NULL;
831
832         /* Unblock SIGTERM again in the parent */
833         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
834                 goto fail;
835
836         /* We close the log explicitly here, since the PAM modules
837          * might have opened it, but we don't want this fd around. */
838         closelog();
839
840         *pam_env = e;
841         e = NULL;
842
843         return 0;
844
845 fail:
846         if (pam_code != PAM_SUCCESS)
847                 err = -EPERM;  /* PAM errors do not map to errno */
848         else
849                 err = -errno;
850
851         if (handle) {
852                 if (close_session)
853                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
854
855                 pam_end(handle, pam_code | PAM_DATA_SILENT);
856         }
857
858         strv_free(e);
859
860         closelog();
861
862         if (pam_pid > 1) {
863                 kill(pam_pid, SIGTERM);
864                 kill(pam_pid, SIGCONT);
865         }
866
867         return err;
868 }
869 #endif
870
871 static int do_capability_bounding_set_drop(uint64_t drop) {
872         unsigned long i;
873         cap_t old_cap = NULL, new_cap = NULL;
874         cap_flag_value_t fv;
875         int r;
876
877         /* If we are run as PID 1 we will lack CAP_SETPCAP by default
878          * in the effective set (yes, the kernel drops that when
879          * executing init!), so get it back temporarily so that we can
880          * call PR_CAPBSET_DROP. */
881
882         old_cap = cap_get_proc();
883         if (!old_cap)
884                 return -errno;
885
886         if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
887                 r = -errno;
888                 goto finish;
889         }
890
891         if (fv != CAP_SET) {
892                 static const cap_value_t v = CAP_SETPCAP;
893
894                 new_cap = cap_dup(old_cap);
895                 if (!new_cap) {
896                         r = -errno;
897                         goto finish;
898                 }
899
900                 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
901                         r = -errno;
902                         goto finish;
903                 }
904
905                 if (cap_set_proc(new_cap) < 0) {
906                         r = -errno;
907                         goto finish;
908                 }
909         }
910
911         for (i = 0; i <= cap_last_cap(); i++)
912                 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
913                         if (prctl(PR_CAPBSET_DROP, i) < 0) {
914                                 r = -errno;
915                                 goto finish;
916                         }
917                 }
918
919         r = 0;
920
921 finish:
922         if (new_cap)
923                 cap_free(new_cap);
924
925         if (old_cap) {
926                 cap_set_proc(old_cap);
927                 cap_free(old_cap);
928         }
929
930         return r;
931 }
932
933 static void rename_process_from_path(const char *path) {
934         char process_name[11];
935         const char *p;
936         size_t l;
937
938         /* This resulting string must fit in 10 chars (i.e. the length
939          * of "/sbin/init") to look pretty in /bin/ps */
940
941         p = path_get_file_name(path);
942         if (isempty(p)) {
943                 rename_process("(...)");
944                 return;
945         }
946
947         l = strlen(p);
948         if (l > 8) {
949                 /* The end of the process name is usually more
950                  * interesting, since the first bit might just be
951                  * "systemd-" */
952                 p = p + l - 8;
953                 l = 8;
954         }
955
956         process_name[0] = '(';
957         memcpy(process_name+1, p, l);
958         process_name[1+l] = ')';
959         process_name[1+l+1] = 0;
960
961         rename_process(process_name);
962 }
963
964 int exec_spawn(ExecCommand *command,
965                char **argv,
966                const ExecContext *context,
967                int fds[], unsigned n_fds,
968                char **environment,
969                bool apply_permissions,
970                bool apply_chroot,
971                bool apply_tty_stdin,
972                bool confirm_spawn,
973                CGroupBonding *cgroup_bondings,
974                CGroupAttribute *cgroup_attributes,
975                const char *cgroup_suffix,
976                int idle_pipe[2],
977                pid_t *ret) {
978
979         pid_t pid;
980         int r;
981         char *line;
982         int socket_fd;
983         char **files_env = NULL;
984
985         assert(command);
986         assert(context);
987         assert(ret);
988         assert(fds || n_fds <= 0);
989
990         if (context->std_input == EXEC_INPUT_SOCKET ||
991             context->std_output == EXEC_OUTPUT_SOCKET ||
992             context->std_error == EXEC_OUTPUT_SOCKET) {
993
994                 if (n_fds != 1)
995                         return -EINVAL;
996
997                 socket_fd = fds[0];
998
999                 fds = NULL;
1000                 n_fds = 0;
1001         } else
1002                 socket_fd = -1;
1003
1004         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
1005                 log_error("Failed to load environment files: %s", strerror(-r));
1006                 return r;
1007         }
1008
1009         if (!argv)
1010                 argv = command->argv;
1011
1012         if (!(line = exec_command_line(argv))) {
1013                 r = -ENOMEM;
1014                 goto fail_parent;
1015         }
1016
1017         log_debug("About to execute: %s", line);
1018         free(line);
1019
1020         r = cgroup_bonding_realize_list(cgroup_bondings);
1021         if (r < 0)
1022                 goto fail_parent;
1023
1024         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1025
1026         if ((pid = fork()) < 0) {
1027                 r = -errno;
1028                 goto fail_parent;
1029         }
1030
1031         if (pid == 0) {
1032                 int i, err;
1033                 sigset_t ss;
1034                 const char *username = NULL, *home = NULL;
1035                 uid_t uid = (uid_t) -1;
1036                 gid_t gid = (gid_t) -1;
1037                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1038                 unsigned n_env = 0;
1039                 int saved_stdout = -1, saved_stdin = -1;
1040                 bool keep_stdout = false, keep_stdin = false, set_access = false;
1041
1042                 /* child */
1043
1044                 rename_process_from_path(command->path);
1045
1046                 /* We reset exactly these signals, since they are the
1047                  * only ones we set to SIG_IGN in the main daemon. All
1048                  * others we leave untouched because we set them to
1049                  * SIG_DFL or a valid handler initially, both of which
1050                  * will be demoted to SIG_DFL. */
1051                 default_signals(SIGNALS_CRASH_HANDLER,
1052                                 SIGNALS_IGNORE, -1);
1053
1054                 if (context->ignore_sigpipe)
1055                         ignore_signals(SIGPIPE, -1);
1056
1057                 assert_se(sigemptyset(&ss) == 0);
1058                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1059                         err = -errno;
1060                         r = EXIT_SIGNAL_MASK;
1061                         goto fail_child;
1062                 }
1063
1064                 if (idle_pipe) {
1065                         if (idle_pipe[1] >= 0)
1066                                 close_nointr_nofail(idle_pipe[1]);
1067                         if (idle_pipe[0] >= 0) {
1068                                 fd_wait_for_event(idle_pipe[0], POLLHUP, DEFAULT_TIMEOUT_USEC);
1069                                 close_nointr_nofail(idle_pipe[0]);
1070                         }
1071                 }
1072
1073                 /* Close sockets very early to make sure we don't
1074                  * block init reexecution because it cannot bind its
1075                  * sockets */
1076                 log_forget_fds();
1077                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1078                                            socket_fd >= 0 ? 1 : n_fds);
1079                 if (err < 0) {
1080                         r = EXIT_FDS;
1081                         goto fail_child;
1082                 }
1083
1084                 if (!context->same_pgrp)
1085                         if (setsid() < 0) {
1086                                 err = -errno;
1087                                 r = EXIT_SETSID;
1088                                 goto fail_child;
1089                         }
1090
1091                 if (context->tcpwrap_name) {
1092                         if (socket_fd >= 0)
1093                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1094                                         err = -EACCES;
1095                                         r = EXIT_TCPWRAP;
1096                                         goto fail_child;
1097                                 }
1098
1099                         for (i = 0; i < (int) n_fds; i++) {
1100                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1101                                         err = -EACCES;
1102                                         r = EXIT_TCPWRAP;
1103                                         goto fail_child;
1104                                 }
1105                         }
1106                 }
1107
1108                 exec_context_tty_reset(context);
1109
1110                 /* We skip the confirmation step if we shall not apply the TTY */
1111                 if (confirm_spawn &&
1112                     (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1113                         char response;
1114
1115                         /* Set up terminal for the question */
1116                         if ((r = setup_confirm_stdio(context,
1117                                                      &saved_stdin, &saved_stdout))) {
1118                                 err = -errno;
1119                                 goto fail_child;
1120                         }
1121
1122                         /* Now ask the question. */
1123                         if (!(line = exec_command_line(argv))) {
1124                                 err = -ENOMEM;
1125                                 r = EXIT_MEMORY;
1126                                 goto fail_child;
1127                         }
1128
1129                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1130                         free(line);
1131
1132                         if (r < 0 || response == 'n') {
1133                                 err = -ECANCELED;
1134                                 r = EXIT_CONFIRM;
1135                                 goto fail_child;
1136                         } else if (response == 's') {
1137                                 err = r = 0;
1138                                 goto fail_child;
1139                         }
1140
1141                         /* Release terminal for the question */
1142                         if ((r = restore_confirm_stdio(context,
1143                                                        &saved_stdin, &saved_stdout,
1144                                                        &keep_stdin, &keep_stdout))) {
1145                                 err = -errno;
1146                                 goto fail_child;
1147                         }
1148                 }
1149
1150                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1151                  * must sure to drop O_NONBLOCK */
1152                 if (socket_fd >= 0)
1153                         fd_nonblock(socket_fd, false);
1154
1155                 if (!keep_stdin) {
1156                         err = setup_input(context, socket_fd, apply_tty_stdin);
1157                         if (err < 0) {
1158                                 r = EXIT_STDIN;
1159                                 goto fail_child;
1160                         }
1161                 }
1162
1163                 if (!keep_stdout) {
1164                         err = setup_output(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1165                         if (err < 0) {
1166                                 r = EXIT_STDOUT;
1167                                 goto fail_child;
1168                         }
1169                 }
1170
1171                 err = setup_error(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1172                 if (err < 0) {
1173                         r = EXIT_STDERR;
1174                         goto fail_child;
1175                 }
1176
1177                 if (cgroup_bondings) {
1178                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1179                         if (err < 0) {
1180                                 r = EXIT_CGROUP;
1181                                 goto fail_child;
1182                         }
1183                 }
1184
1185                 if (context->oom_score_adjust_set) {
1186                         char t[16];
1187
1188                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1189                         char_array_0(t);
1190
1191                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1192                                 /* Compatibility with Linux <= 2.6.35 */
1193
1194                                 int adj;
1195
1196                                 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1197                                 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1198
1199                                 snprintf(t, sizeof(t), "%i", adj);
1200                                 char_array_0(t);
1201
1202                                 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1203                                     && errno != EACCES) {
1204                                         err = -errno;
1205                                         r = EXIT_OOM_ADJUST;
1206                                         goto fail_child;
1207                                 }
1208                         }
1209                 }
1210
1211                 if (context->nice_set)
1212                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1213                                 err = -errno;
1214                                 r = EXIT_NICE;
1215                                 goto fail_child;
1216                         }
1217
1218                 if (context->cpu_sched_set) {
1219                         struct sched_param param;
1220
1221                         zero(param);
1222                         param.sched_priority = context->cpu_sched_priority;
1223
1224                         if (sched_setscheduler(0, context->cpu_sched_policy |
1225                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1226                                 err = -errno;
1227                                 r = EXIT_SETSCHEDULER;
1228                                 goto fail_child;
1229                         }
1230                 }
1231
1232                 if (context->cpuset)
1233                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1234                                 err = -errno;
1235                                 r = EXIT_CPUAFFINITY;
1236                                 goto fail_child;
1237                         }
1238
1239                 if (context->ioprio_set)
1240                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1241                                 err = -errno;
1242                                 r = EXIT_IOPRIO;
1243                                 goto fail_child;
1244                         }
1245
1246                 if (context->timer_slack_nsec_set)
1247                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1248                                 err = -errno;
1249                                 r = EXIT_TIMERSLACK;
1250                                 goto fail_child;
1251                         }
1252
1253                 if (context->utmp_id)
1254                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1255
1256                 if (context->user) {
1257                         username = context->user;
1258                         err = get_user_creds(&username, &uid, &gid, &home);
1259                         if (err < 0) {
1260                                 r = EXIT_USER;
1261                                 goto fail_child;
1262                         }
1263
1264                         if (is_terminal_input(context->std_input)) {
1265                                 err = chown_terminal(STDIN_FILENO, uid);
1266                                 if (err < 0) {
1267                                         r = EXIT_STDIN;
1268                                         goto fail_child;
1269                                 }
1270                         }
1271
1272                         if (cgroup_bondings && context->control_group_modify) {
1273                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1274                                 if (err >= 0)
1275                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1276                                 if (err < 0) {
1277                                         r = EXIT_CGROUP;
1278                                         goto fail_child;
1279                                 }
1280
1281                                 set_access = true;
1282                         }
1283                 }
1284
1285                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1286                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1287                         if (err < 0) {
1288                                 r = EXIT_CGROUP;
1289                                 goto fail_child;
1290                         }
1291                 }
1292
1293                 if (apply_permissions) {
1294                         err = enforce_groups(context, username, gid);
1295                         if (err < 0) {
1296                                 r = EXIT_GROUP;
1297                                 goto fail_child;
1298                         }
1299                 }
1300
1301                 umask(context->umask);
1302
1303 #ifdef HAVE_PAM
1304                 if (context->pam_name && username) {
1305                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1306                         if (err < 0) {
1307                                 r = EXIT_PAM;
1308                                 goto fail_child;
1309                         }
1310                 }
1311 #endif
1312                 if (context->private_network) {
1313                         if (unshare(CLONE_NEWNET) < 0) {
1314                                 err = -errno;
1315                                 r = EXIT_NETWORK;
1316                                 goto fail_child;
1317                         }
1318
1319                         loopback_setup();
1320                 }
1321
1322                 if (strv_length(context->read_write_dirs) > 0 ||
1323                     strv_length(context->read_only_dirs) > 0 ||
1324                     strv_length(context->inaccessible_dirs) > 0 ||
1325                     context->mount_flags != MS_SHARED ||
1326                     context->private_tmp) {
1327                         err = setup_namespace(context->read_write_dirs,
1328                                               context->read_only_dirs,
1329                                               context->inaccessible_dirs,
1330                                               context->private_tmp,
1331                                               context->mount_flags);
1332                         if (err < 0) {
1333                                 r = EXIT_NAMESPACE;
1334                                 goto fail_child;
1335                         }
1336                 }
1337
1338                 if (apply_chroot) {
1339                         if (context->root_directory)
1340                                 if (chroot(context->root_directory) < 0) {
1341                                         err = -errno;
1342                                         r = EXIT_CHROOT;
1343                                         goto fail_child;
1344                                 }
1345
1346                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1347                                 err = -errno;
1348                                 r = EXIT_CHDIR;
1349                                 goto fail_child;
1350                         }
1351                 } else {
1352
1353                         char *d;
1354
1355                         if (asprintf(&d, "%s/%s",
1356                                      context->root_directory ? context->root_directory : "",
1357                                      context->working_directory ? context->working_directory : "") < 0) {
1358                                 err = -ENOMEM;
1359                                 r = EXIT_MEMORY;
1360                                 goto fail_child;
1361                         }
1362
1363                         if (chdir(d) < 0) {
1364                                 err = -errno;
1365                                 free(d);
1366                                 r = EXIT_CHDIR;
1367                                 goto fail_child;
1368                         }
1369
1370                         free(d);
1371                 }
1372
1373                 /* We repeat the fd closing here, to make sure that
1374                  * nothing is leaked from the PAM modules */
1375                 err = close_all_fds(fds, n_fds);
1376                 if (err >= 0)
1377                         err = shift_fds(fds, n_fds);
1378                 if (err >= 0)
1379                         err = flags_fds(fds, n_fds, context->non_blocking);
1380                 if (err < 0) {
1381                         r = EXIT_FDS;
1382                         goto fail_child;
1383                 }
1384
1385                 if (apply_permissions) {
1386
1387                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1388                                 if (!context->rlimit[i])
1389                                         continue;
1390
1391                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1392                                         err = -errno;
1393                                         r = EXIT_LIMITS;
1394                                         goto fail_child;
1395                                 }
1396                         }
1397
1398                         if (context->capability_bounding_set_drop) {
1399                                 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1400                                 if (err < 0) {
1401                                         r = EXIT_CAPABILITIES;
1402                                         goto fail_child;
1403                                 }
1404                         }
1405
1406                         if (context->user) {
1407                                 err = enforce_user(context, uid);
1408                                 if (err < 0) {
1409                                         r = EXIT_USER;
1410                                         goto fail_child;
1411                                 }
1412                         }
1413
1414                         /* PR_GET_SECUREBITS is not privileged, while
1415                          * PR_SET_SECUREBITS is. So to suppress
1416                          * potential EPERMs we'll try not to call
1417                          * PR_SET_SECUREBITS unless necessary. */
1418                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1419                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1420                                         err = -errno;
1421                                         r = EXIT_SECUREBITS;
1422                                         goto fail_child;
1423                                 }
1424
1425                         if (context->capabilities)
1426                                 if (cap_set_proc(context->capabilities) < 0) {
1427                                         err = -errno;
1428                                         r = EXIT_CAPABILITIES;
1429                                         goto fail_child;
1430                                 }
1431                 }
1432
1433                 if (!(our_env = new0(char*, 7))) {
1434                         err = -ENOMEM;
1435                         r = EXIT_MEMORY;
1436                         goto fail_child;
1437                 }
1438
1439                 if (n_fds > 0)
1440                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1441                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1442                                 err = -ENOMEM;
1443                                 r = EXIT_MEMORY;
1444                                 goto fail_child;
1445                         }
1446
1447                 if (home)
1448                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1449                                 err = -ENOMEM;
1450                                 r = EXIT_MEMORY;
1451                                 goto fail_child;
1452                         }
1453
1454                 if (username)
1455                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1456                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1457                                 err = -ENOMEM;
1458                                 r = EXIT_MEMORY;
1459                                 goto fail_child;
1460                         }
1461
1462                 if (is_terminal_input(context->std_input) ||
1463                     context->std_output == EXEC_OUTPUT_TTY ||
1464                     context->std_error == EXEC_OUTPUT_TTY)
1465                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1466                                 err = -ENOMEM;
1467                                 r = EXIT_MEMORY;
1468                                 goto fail_child;
1469                         }
1470
1471                 assert(n_env <= 7);
1472
1473                 if (!(final_env = strv_env_merge(
1474                                       5,
1475                                       environment,
1476                                       our_env,
1477                                       context->environment,
1478                                       files_env,
1479                                       pam_env,
1480                                       NULL))) {
1481                         err = -ENOMEM;
1482                         r = EXIT_MEMORY;
1483                         goto fail_child;
1484                 }
1485
1486                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1487                         err = -ENOMEM;
1488                         r = EXIT_MEMORY;
1489                         goto fail_child;
1490                 }
1491
1492                 final_env = strv_env_clean(final_env);
1493
1494                 execve(command->path, final_argv, final_env);
1495                 err = -errno;
1496                 r = EXIT_EXEC;
1497
1498         fail_child:
1499                 if (r != 0) {
1500                         log_open();
1501                         log_warning("Failed at step %s spawning %s: %s",
1502                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1503                                     command->path, strerror(-err));
1504                 }
1505
1506                 strv_free(our_env);
1507                 strv_free(final_env);
1508                 strv_free(pam_env);
1509                 strv_free(files_env);
1510                 strv_free(final_argv);
1511
1512                 if (saved_stdin >= 0)
1513                         close_nointr_nofail(saved_stdin);
1514
1515                 if (saved_stdout >= 0)
1516                         close_nointr_nofail(saved_stdout);
1517
1518                 _exit(r);
1519         }
1520
1521         strv_free(files_env);
1522
1523         /* We add the new process to the cgroup both in the child (so
1524          * that we can be sure that no user code is ever executed
1525          * outside of the cgroup) and in the parent (so that we can be
1526          * sure that when we kill the cgroup the process will be
1527          * killed too). */
1528         if (cgroup_bondings)
1529                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1530
1531         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1532
1533         exec_status_start(&command->exec_status, pid);
1534
1535         *ret = pid;
1536         return 0;
1537
1538 fail_parent:
1539         strv_free(files_env);
1540
1541         return r;
1542 }
1543
1544 void exec_context_init(ExecContext *c) {
1545         assert(c);
1546
1547         c->umask = 0022;
1548         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1549         c->cpu_sched_policy = SCHED_OTHER;
1550         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1551         c->syslog_level_prefix = true;
1552         c->mount_flags = MS_SHARED;
1553         c->kill_signal = SIGTERM;
1554         c->send_sigkill = true;
1555         c->control_group_persistent = -1;
1556         c->ignore_sigpipe = true;
1557 }
1558
1559 void exec_context_done(ExecContext *c) {
1560         unsigned l;
1561
1562         assert(c);
1563
1564         strv_free(c->environment);
1565         c->environment = NULL;
1566
1567         strv_free(c->environment_files);
1568         c->environment_files = NULL;
1569
1570         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1571                 free(c->rlimit[l]);
1572                 c->rlimit[l] = NULL;
1573         }
1574
1575         free(c->working_directory);
1576         c->working_directory = NULL;
1577         free(c->root_directory);
1578         c->root_directory = NULL;
1579
1580         free(c->tty_path);
1581         c->tty_path = NULL;
1582
1583         free(c->tcpwrap_name);
1584         c->tcpwrap_name = NULL;
1585
1586         free(c->syslog_identifier);
1587         c->syslog_identifier = NULL;
1588
1589         free(c->user);
1590         c->user = NULL;
1591
1592         free(c->group);
1593         c->group = NULL;
1594
1595         strv_free(c->supplementary_groups);
1596         c->supplementary_groups = NULL;
1597
1598         free(c->pam_name);
1599         c->pam_name = NULL;
1600
1601         if (c->capabilities) {
1602                 cap_free(c->capabilities);
1603                 c->capabilities = NULL;
1604         }
1605
1606         strv_free(c->read_only_dirs);
1607         c->read_only_dirs = NULL;
1608
1609         strv_free(c->read_write_dirs);
1610         c->read_write_dirs = NULL;
1611
1612         strv_free(c->inaccessible_dirs);
1613         c->inaccessible_dirs = NULL;
1614
1615         if (c->cpuset)
1616                 CPU_FREE(c->cpuset);
1617
1618         free(c->utmp_id);
1619         c->utmp_id = NULL;
1620 }
1621
1622 void exec_command_done(ExecCommand *c) {
1623         assert(c);
1624
1625         free(c->path);
1626         c->path = NULL;
1627
1628         strv_free(c->argv);
1629         c->argv = NULL;
1630 }
1631
1632 void exec_command_done_array(ExecCommand *c, unsigned n) {
1633         unsigned i;
1634
1635         for (i = 0; i < n; i++)
1636                 exec_command_done(c+i);
1637 }
1638
1639 void exec_command_free_list(ExecCommand *c) {
1640         ExecCommand *i;
1641
1642         while ((i = c)) {
1643                 LIST_REMOVE(ExecCommand, command, c, i);
1644                 exec_command_done(i);
1645                 free(i);
1646         }
1647 }
1648
1649 void exec_command_free_array(ExecCommand **c, unsigned n) {
1650         unsigned i;
1651
1652         for (i = 0; i < n; i++) {
1653                 exec_command_free_list(c[i]);
1654                 c[i] = NULL;
1655         }
1656 }
1657
1658 int exec_context_load_environment(const ExecContext *c, char ***l) {
1659         char **i, **r = NULL;
1660
1661         assert(c);
1662         assert(l);
1663
1664         STRV_FOREACH(i, c->environment_files) {
1665                 char *fn;
1666                 int k;
1667                 bool ignore = false;
1668                 char **p;
1669
1670                 fn = *i;
1671
1672                 if (fn[0] == '-') {
1673                         ignore = true;
1674                         fn ++;
1675                 }
1676
1677                 if (!path_is_absolute(fn)) {
1678
1679                         if (ignore)
1680                                 continue;
1681
1682                         strv_free(r);
1683                         return -EINVAL;
1684                 }
1685
1686                 if ((k = load_env_file(fn, &p)) < 0) {
1687
1688                         if (ignore)
1689                                 continue;
1690
1691                         strv_free(r);
1692                         return k;
1693                 }
1694
1695                 if (r == NULL)
1696                         r = p;
1697                 else {
1698                         char **m;
1699
1700                         m = strv_env_merge(2, r, p);
1701                         strv_free(r);
1702                         strv_free(p);
1703
1704                         if (!m)
1705                                 return -ENOMEM;
1706
1707                         r = m;
1708                 }
1709         }
1710
1711         *l = r;
1712
1713         return 0;
1714 }
1715
1716 static void strv_fprintf(FILE *f, char **l) {
1717         char **g;
1718
1719         assert(f);
1720
1721         STRV_FOREACH(g, l)
1722                 fprintf(f, " %s", *g);
1723 }
1724
1725 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1726         char ** e;
1727         unsigned i;
1728
1729         assert(c);
1730         assert(f);
1731
1732         if (!prefix)
1733                 prefix = "";
1734
1735         fprintf(f,
1736                 "%sUMask: %04o\n"
1737                 "%sWorkingDirectory: %s\n"
1738                 "%sRootDirectory: %s\n"
1739                 "%sNonBlocking: %s\n"
1740                 "%sPrivateTmp: %s\n"
1741                 "%sControlGroupModify: %s\n"
1742                 "%sControlGroupPersistent: %s\n"
1743                 "%sPrivateNetwork: %s\n",
1744                 prefix, c->umask,
1745                 prefix, c->working_directory ? c->working_directory : "/",
1746                 prefix, c->root_directory ? c->root_directory : "/",
1747                 prefix, yes_no(c->non_blocking),
1748                 prefix, yes_no(c->private_tmp),
1749                 prefix, yes_no(c->control_group_modify),
1750                 prefix, yes_no(c->control_group_persistent),
1751                 prefix, yes_no(c->private_network));
1752
1753         STRV_FOREACH(e, c->environment)
1754                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1755
1756         STRV_FOREACH(e, c->environment_files)
1757                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1758
1759         if (c->tcpwrap_name)
1760                 fprintf(f,
1761                         "%sTCPWrapName: %s\n",
1762                         prefix, c->tcpwrap_name);
1763
1764         if (c->nice_set)
1765                 fprintf(f,
1766                         "%sNice: %i\n",
1767                         prefix, c->nice);
1768
1769         if (c->oom_score_adjust_set)
1770                 fprintf(f,
1771                         "%sOOMScoreAdjust: %i\n",
1772                         prefix, c->oom_score_adjust);
1773
1774         for (i = 0; i < RLIM_NLIMITS; i++)
1775                 if (c->rlimit[i])
1776                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1777
1778         if (c->ioprio_set)
1779                 fprintf(f,
1780                         "%sIOSchedulingClass: %s\n"
1781                         "%sIOPriority: %i\n",
1782                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1783                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1784
1785         if (c->cpu_sched_set)
1786                 fprintf(f,
1787                         "%sCPUSchedulingPolicy: %s\n"
1788                         "%sCPUSchedulingPriority: %i\n"
1789                         "%sCPUSchedulingResetOnFork: %s\n",
1790                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1791                         prefix, c->cpu_sched_priority,
1792                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1793
1794         if (c->cpuset) {
1795                 fprintf(f, "%sCPUAffinity:", prefix);
1796                 for (i = 0; i < c->cpuset_ncpus; i++)
1797                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1798                                 fprintf(f, " %i", i);
1799                 fputs("\n", f);
1800         }
1801
1802         if (c->timer_slack_nsec_set)
1803                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1804
1805         fprintf(f,
1806                 "%sStandardInput: %s\n"
1807                 "%sStandardOutput: %s\n"
1808                 "%sStandardError: %s\n",
1809                 prefix, exec_input_to_string(c->std_input),
1810                 prefix, exec_output_to_string(c->std_output),
1811                 prefix, exec_output_to_string(c->std_error));
1812
1813         if (c->tty_path)
1814                 fprintf(f,
1815                         "%sTTYPath: %s\n"
1816                         "%sTTYReset: %s\n"
1817                         "%sTTYVHangup: %s\n"
1818                         "%sTTYVTDisallocate: %s\n",
1819                         prefix, c->tty_path,
1820                         prefix, yes_no(c->tty_reset),
1821                         prefix, yes_no(c->tty_vhangup),
1822                         prefix, yes_no(c->tty_vt_disallocate));
1823
1824         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1825             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1826             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1827             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1828                 fprintf(f,
1829                         "%sSyslogFacility: %s\n"
1830                         "%sSyslogLevel: %s\n",
1831                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1832                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1833
1834         if (c->capabilities) {
1835                 char *t;
1836                 if ((t = cap_to_text(c->capabilities, NULL))) {
1837                         fprintf(f, "%sCapabilities: %s\n",
1838                                 prefix, t);
1839                         cap_free(t);
1840                 }
1841         }
1842
1843         if (c->secure_bits)
1844                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1845                         prefix,
1846                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1847                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1848                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1849                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1850                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1851                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1852
1853         if (c->capability_bounding_set_drop) {
1854                 unsigned long l;
1855                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1856
1857                 for (l = 0; l <= cap_last_cap(); l++)
1858                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1859                                 char *t;
1860
1861                                 if ((t = cap_to_name(l))) {
1862                                         fprintf(f, " %s", t);
1863                                         cap_free(t);
1864                                 }
1865                         }
1866
1867                 fputs("\n", f);
1868         }
1869
1870         if (c->user)
1871                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1872         if (c->group)
1873                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1874
1875         if (strv_length(c->supplementary_groups) > 0) {
1876                 fprintf(f, "%sSupplementaryGroups:", prefix);
1877                 strv_fprintf(f, c->supplementary_groups);
1878                 fputs("\n", f);
1879         }
1880
1881         if (c->pam_name)
1882                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1883
1884         if (strv_length(c->read_write_dirs) > 0) {
1885                 fprintf(f, "%sReadWriteDirs:", prefix);
1886                 strv_fprintf(f, c->read_write_dirs);
1887                 fputs("\n", f);
1888         }
1889
1890         if (strv_length(c->read_only_dirs) > 0) {
1891                 fprintf(f, "%sReadOnlyDirs:", prefix);
1892                 strv_fprintf(f, c->read_only_dirs);
1893                 fputs("\n", f);
1894         }
1895
1896         if (strv_length(c->inaccessible_dirs) > 0) {
1897                 fprintf(f, "%sInaccessibleDirs:", prefix);
1898                 strv_fprintf(f, c->inaccessible_dirs);
1899                 fputs("\n", f);
1900         }
1901
1902         fprintf(f,
1903                 "%sKillMode: %s\n"
1904                 "%sKillSignal: SIG%s\n"
1905                 "%sSendSIGKILL: %s\n"
1906                 "%sIgnoreSIGPIPE: %s\n",
1907                 prefix, kill_mode_to_string(c->kill_mode),
1908                 prefix, signal_to_string(c->kill_signal),
1909                 prefix, yes_no(c->send_sigkill),
1910                 prefix, yes_no(c->ignore_sigpipe));
1911
1912         if (c->utmp_id)
1913                 fprintf(f,
1914                         "%sUtmpIdentifier: %s\n",
1915                         prefix, c->utmp_id);
1916 }
1917
1918 void exec_status_start(ExecStatus *s, pid_t pid) {
1919         assert(s);
1920
1921         zero(*s);
1922         s->pid = pid;
1923         dual_timestamp_get(&s->start_timestamp);
1924 }
1925
1926 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1927         assert(s);
1928
1929         if (s->pid && s->pid != pid)
1930                 zero(*s);
1931
1932         s->pid = pid;
1933         dual_timestamp_get(&s->exit_timestamp);
1934
1935         s->code = code;
1936         s->status = status;
1937
1938         if (context) {
1939                 if (context->utmp_id)
1940                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1941
1942                 exec_context_tty_reset(context);
1943         }
1944 }
1945
1946 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1947         char buf[FORMAT_TIMESTAMP_MAX];
1948
1949         assert(s);
1950         assert(f);
1951
1952         if (!prefix)
1953                 prefix = "";
1954
1955         if (s->pid <= 0)
1956                 return;
1957
1958         fprintf(f,
1959                 "%sPID: %lu\n",
1960                 prefix, (unsigned long) s->pid);
1961
1962         if (s->start_timestamp.realtime > 0)
1963                 fprintf(f,
1964                         "%sStart Timestamp: %s\n",
1965                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1966
1967         if (s->exit_timestamp.realtime > 0)
1968                 fprintf(f,
1969                         "%sExit Timestamp: %s\n"
1970                         "%sExit Code: %s\n"
1971                         "%sExit Status: %i\n",
1972                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1973                         prefix, sigchld_code_to_string(s->code),
1974                         prefix, s->status);
1975 }
1976
1977 char *exec_command_line(char **argv) {
1978         size_t k;
1979         char *n, *p, **a;
1980         bool first = true;
1981
1982         assert(argv);
1983
1984         k = 1;
1985         STRV_FOREACH(a, argv)
1986                 k += strlen(*a)+3;
1987
1988         if (!(n = new(char, k)))
1989                 return NULL;
1990
1991         p = n;
1992         STRV_FOREACH(a, argv) {
1993
1994                 if (!first)
1995                         *(p++) = ' ';
1996                 else
1997                         first = false;
1998
1999                 if (strpbrk(*a, WHITESPACE)) {
2000                         *(p++) = '\'';
2001                         p = stpcpy(p, *a);
2002                         *(p++) = '\'';
2003                 } else
2004                         p = stpcpy(p, *a);
2005
2006         }
2007
2008         *p = 0;
2009
2010         /* FIXME: this doesn't really handle arguments that have
2011          * spaces and ticks in them */
2012
2013         return n;
2014 }
2015
2016 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2017         char *p2;
2018         const char *prefix2;
2019
2020         char *cmd;
2021
2022         assert(c);
2023         assert(f);
2024
2025         if (!prefix)
2026                 prefix = "";
2027         p2 = strappend(prefix, "\t");
2028         prefix2 = p2 ? p2 : prefix;
2029
2030         cmd = exec_command_line(c->argv);
2031
2032         fprintf(f,
2033                 "%sCommand Line: %s\n",
2034                 prefix, cmd ? cmd : strerror(ENOMEM));
2035
2036         free(cmd);
2037
2038         exec_status_dump(&c->exec_status, f, prefix2);
2039
2040         free(p2);
2041 }
2042
2043 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2044         assert(f);
2045
2046         if (!prefix)
2047                 prefix = "";
2048
2049         LIST_FOREACH(command, c, c)
2050                 exec_command_dump(c, f, prefix);
2051 }
2052
2053 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2054         ExecCommand *end;
2055
2056         assert(l);
2057         assert(e);
2058
2059         if (*l) {
2060                 /* It's kind of important, that we keep the order here */
2061                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2062                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2063         } else
2064               *l = e;
2065 }
2066
2067 int exec_command_set(ExecCommand *c, const char *path, ...) {
2068         va_list ap;
2069         char **l, *p;
2070
2071         assert(c);
2072         assert(path);
2073
2074         va_start(ap, path);
2075         l = strv_new_ap(path, ap);
2076         va_end(ap);
2077
2078         if (!l)
2079                 return -ENOMEM;
2080
2081         if (!(p = strdup(path))) {
2082                 strv_free(l);
2083                 return -ENOMEM;
2084         }
2085
2086         free(c->path);
2087         c->path = p;
2088
2089         strv_free(c->argv);
2090         c->argv = l;
2091
2092         return 0;
2093 }
2094
2095 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2096         [EXEC_INPUT_NULL] = "null",
2097         [EXEC_INPUT_TTY] = "tty",
2098         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2099         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2100         [EXEC_INPUT_SOCKET] = "socket"
2101 };
2102
2103 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2104
2105 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2106         [EXEC_OUTPUT_INHERIT] = "inherit",
2107         [EXEC_OUTPUT_NULL] = "null",
2108         [EXEC_OUTPUT_TTY] = "tty",
2109         [EXEC_OUTPUT_SYSLOG] = "syslog",
2110         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2111         [EXEC_OUTPUT_KMSG] = "kmsg",
2112         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2113         [EXEC_OUTPUT_JOURNAL] = "journal",
2114         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2115         [EXEC_OUTPUT_SOCKET] = "socket"
2116 };
2117
2118 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2119
2120 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2121         [KILL_CONTROL_GROUP] = "control-group",
2122         [KILL_PROCESS] = "process",
2123         [KILL_NONE] = "none"
2124 };
2125
2126 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2127
2128 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2129         [KILL_MAIN] = "main",
2130         [KILL_CONTROL] = "control",
2131         [KILL_ALL] = "all"
2132 };
2133
2134 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);