chiark / gitweb /
953cfa2baa30d795677a46cc0cd343b0ab65e9a1
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41
42 #ifdef HAVE_PAM
43 #include <security/pam_appl.h>
44 #endif
45
46 #include "execute.h"
47 #include "strv.h"
48 #include "macro.h"
49 #include "capability.h"
50 #include "util.h"
51 #include "log.h"
52 #include "ioprio.h"
53 #include "securebits.h"
54 #include "cgroup.h"
55 #include "namespace.h"
56 #include "tcpwrap.h"
57 #include "exit-status.h"
58 #include "missing.h"
59 #include "utmp-wtmp.h"
60 #include "def.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
63
64 /* This assumes there is a 'tty' group */
65 #define TTY_MODE 0620
66
67 static int shift_fds(int fds[], unsigned n_fds) {
68         int start, restart_from;
69
70         if (n_fds <= 0)
71                 return 0;
72
73         /* Modifies the fds array! (sorts it) */
74
75         assert(fds);
76
77         start = 0;
78         for (;;) {
79                 int i;
80
81                 restart_from = -1;
82
83                 for (i = start; i < (int) n_fds; i++) {
84                         int nfd;
85
86                         /* Already at right index? */
87                         if (fds[i] == i+3)
88                                 continue;
89
90                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
91                                 return -errno;
92
93                         close_nointr_nofail(fds[i]);
94                         fds[i] = nfd;
95
96                         /* Hmm, the fd we wanted isn't free? Then
97                          * let's remember that and try again from here*/
98                         if (nfd != i+3 && restart_from < 0)
99                                 restart_from = i;
100                 }
101
102                 if (restart_from < 0)
103                         break;
104
105                 start = restart_from;
106         }
107
108         return 0;
109 }
110
111 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
112         unsigned i;
113         int r;
114
115         if (n_fds <= 0)
116                 return 0;
117
118         assert(fds);
119
120         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
121
122         for (i = 0; i < n_fds; i++) {
123
124                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
125                         return r;
126
127                 /* We unconditionally drop FD_CLOEXEC from the fds,
128                  * since after all we want to pass these fds to our
129                  * children */
130
131                 if ((r = fd_cloexec(fds[i], false)) < 0)
132                         return r;
133         }
134
135         return 0;
136 }
137
138 static const char *tty_path(const ExecContext *context) {
139         assert(context);
140
141         if (context->tty_path)
142                 return context->tty_path;
143
144         return "/dev/console";
145 }
146
147 void exec_context_tty_reset(const ExecContext *context) {
148         assert(context);
149
150         if (context->tty_vhangup)
151                 terminal_vhangup(tty_path(context));
152
153         if (context->tty_reset)
154                 reset_terminal(tty_path(context));
155
156         if (context->tty_vt_disallocate && context->tty_path)
157                 vt_disallocate(context->tty_path);
158 }
159
160 static int open_null_as(int flags, int nfd) {
161         int fd, r;
162
163         assert(nfd >= 0);
164
165         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
166                 return -errno;
167
168         if (fd != nfd) {
169                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
170                 close_nointr_nofail(fd);
171         } else
172                 r = nfd;
173
174         return r;
175 }
176
177 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
178         int fd, r;
179         union sockaddr_union sa;
180
181         assert(context);
182         assert(output < _EXEC_OUTPUT_MAX);
183         assert(ident);
184         assert(nfd >= 0);
185
186         fd = socket(AF_UNIX, SOCK_STREAM, 0);
187         if (fd < 0)
188                 return -errno;
189
190         zero(sa);
191         sa.un.sun_family = AF_UNIX;
192         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
193
194         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
195         if (r < 0) {
196                 close_nointr_nofail(fd);
197                 return -errno;
198         }
199
200         if (shutdown(fd, SHUT_RD) < 0) {
201                 close_nointr_nofail(fd);
202                 return -errno;
203         }
204
205         dprintf(fd,
206                 "%s\n"
207                 "%i\n"
208                 "%i\n"
209                 "%i\n"
210                 "%i\n"
211                 "%i\n",
212                 context->syslog_identifier ? context->syslog_identifier : ident,
213                 context->syslog_priority,
214                 !!context->syslog_level_prefix,
215                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
216                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
217                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
218
219         if (fd != nfd) {
220                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
221                 close_nointr_nofail(fd);
222         } else
223                 r = nfd;
224
225         return r;
226 }
227 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
228         int fd, r;
229
230         assert(path);
231         assert(nfd >= 0);
232
233         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
234                 return fd;
235
236         if (fd != nfd) {
237                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
238                 close_nointr_nofail(fd);
239         } else
240                 r = nfd;
241
242         return r;
243 }
244
245 static bool is_terminal_input(ExecInput i) {
246         return
247                 i == EXEC_INPUT_TTY ||
248                 i == EXEC_INPUT_TTY_FORCE ||
249                 i == EXEC_INPUT_TTY_FAIL;
250 }
251
252 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
253
254         if (is_terminal_input(std_input) && !apply_tty_stdin)
255                 return EXEC_INPUT_NULL;
256
257         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
258                 return EXEC_INPUT_NULL;
259
260         return std_input;
261 }
262
263 static int fixup_output(ExecOutput std_output, int socket_fd) {
264
265         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
266                 return EXEC_OUTPUT_INHERIT;
267
268         return std_output;
269 }
270
271 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
272         ExecInput i;
273
274         assert(context);
275
276         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
277
278         switch (i) {
279
280         case EXEC_INPUT_NULL:
281                 return open_null_as(O_RDONLY, STDIN_FILENO);
282
283         case EXEC_INPUT_TTY:
284         case EXEC_INPUT_TTY_FORCE:
285         case EXEC_INPUT_TTY_FAIL: {
286                 int fd, r;
287
288                 if ((fd = acquire_terminal(
289                                      tty_path(context),
290                                      i == EXEC_INPUT_TTY_FAIL,
291                                      i == EXEC_INPUT_TTY_FORCE,
292                                      false)) < 0)
293                         return fd;
294
295                 if (fd != STDIN_FILENO) {
296                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
297                         close_nointr_nofail(fd);
298                 } else
299                         r = STDIN_FILENO;
300
301                 return r;
302         }
303
304         case EXEC_INPUT_SOCKET:
305                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306
307         default:
308                 assert_not_reached("Unknown input type");
309         }
310 }
311
312 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
313         ExecOutput o;
314         ExecInput i;
315
316         assert(context);
317         assert(ident);
318
319         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
320         o = fixup_output(context->std_output, socket_fd);
321
322         /* This expects the input is already set up */
323
324         switch (o) {
325
326         case EXEC_OUTPUT_INHERIT:
327
328                 /* If input got downgraded, inherit the original value */
329                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
330                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
331
332                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
333                 if (i != EXEC_INPUT_NULL)
334                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
335
336                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
337                 if (getppid() != 1)
338                         return STDOUT_FILENO;
339
340                 /* We need to open /dev/null here anew, to get the
341                  * right access mode. So we fall through */
342
343         case EXEC_OUTPUT_NULL:
344                 return open_null_as(O_WRONLY, STDOUT_FILENO);
345
346         case EXEC_OUTPUT_TTY:
347                 if (is_terminal_input(i))
348                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
349
350                 /* We don't reset the terminal if this is just about output */
351                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
352
353         case EXEC_OUTPUT_SYSLOG:
354         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
355         case EXEC_OUTPUT_KMSG:
356         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
357         case EXEC_OUTPUT_JOURNAL:
358         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
359                 return connect_logger_as(context, o, ident, STDOUT_FILENO);
360
361         case EXEC_OUTPUT_SOCKET:
362                 assert(socket_fd >= 0);
363                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
364
365         default:
366                 assert_not_reached("Unknown output type");
367         }
368 }
369
370 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
371         ExecOutput o, e;
372         ExecInput i;
373
374         assert(context);
375         assert(ident);
376
377         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
378         o = fixup_output(context->std_output, socket_fd);
379         e = fixup_output(context->std_error, socket_fd);
380
381         /* This expects the input and output are already set up */
382
383         /* Don't change the stderr file descriptor if we inherit all
384          * the way and are not on a tty */
385         if (e == EXEC_OUTPUT_INHERIT &&
386             o == EXEC_OUTPUT_INHERIT &&
387             i == EXEC_INPUT_NULL &&
388             !is_terminal_input(context->std_input) &&
389             getppid () != 1)
390                 return STDERR_FILENO;
391
392         /* Duplicate from stdout if possible */
393         if (e == o || e == EXEC_OUTPUT_INHERIT)
394                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
395
396         switch (e) {
397
398         case EXEC_OUTPUT_NULL:
399                 return open_null_as(O_WRONLY, STDERR_FILENO);
400
401         case EXEC_OUTPUT_TTY:
402                 if (is_terminal_input(i))
403                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
404
405                 /* We don't reset the terminal if this is just about output */
406                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
407
408         case EXEC_OUTPUT_SYSLOG:
409         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
410         case EXEC_OUTPUT_KMSG:
411         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
412         case EXEC_OUTPUT_JOURNAL:
413         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
414                 return connect_logger_as(context, e, ident, STDERR_FILENO);
415
416         case EXEC_OUTPUT_SOCKET:
417                 assert(socket_fd >= 0);
418                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
419
420         default:
421                 assert_not_reached("Unknown error type");
422         }
423 }
424
425 static int chown_terminal(int fd, uid_t uid) {
426         struct stat st;
427
428         assert(fd >= 0);
429
430         /* This might fail. What matters are the results. */
431         (void) fchown(fd, uid, -1);
432         (void) fchmod(fd, TTY_MODE);
433
434         if (fstat(fd, &st) < 0)
435                 return -errno;
436
437         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
438                 return -EPERM;
439
440         return 0;
441 }
442
443 static int setup_confirm_stdio(const ExecContext *context,
444                                int *_saved_stdin,
445                                int *_saved_stdout) {
446         int fd = -1, saved_stdin, saved_stdout = -1, r;
447
448         assert(context);
449         assert(_saved_stdin);
450         assert(_saved_stdout);
451
452         /* This returns positive EXIT_xxx return values instead of
453          * negative errno style values! */
454
455         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
456                 return EXIT_STDIN;
457
458         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
459                 r = EXIT_STDOUT;
460                 goto fail;
461         }
462
463         if ((fd = acquire_terminal(
464                              tty_path(context),
465                              context->std_input == EXEC_INPUT_TTY_FAIL,
466                              context->std_input == EXEC_INPUT_TTY_FORCE,
467                              false)) < 0) {
468                 r = EXIT_STDIN;
469                 goto fail;
470         }
471
472         if (chown_terminal(fd, getuid()) < 0) {
473                 r = EXIT_STDIN;
474                 goto fail;
475         }
476
477         if (dup2(fd, STDIN_FILENO) < 0) {
478                 r = EXIT_STDIN;
479                 goto fail;
480         }
481
482         if (dup2(fd, STDOUT_FILENO) < 0) {
483                 r = EXIT_STDOUT;
484                 goto fail;
485         }
486
487         if (fd >= 2)
488                 close_nointr_nofail(fd);
489
490         *_saved_stdin = saved_stdin;
491         *_saved_stdout = saved_stdout;
492
493         return 0;
494
495 fail:
496         if (saved_stdout >= 0)
497                 close_nointr_nofail(saved_stdout);
498
499         if (saved_stdin >= 0)
500                 close_nointr_nofail(saved_stdin);
501
502         if (fd >= 0)
503                 close_nointr_nofail(fd);
504
505         return r;
506 }
507
508 static int restore_confirm_stdio(const ExecContext *context,
509                                  int *saved_stdin,
510                                  int *saved_stdout,
511                                  bool *keep_stdin,
512                                  bool *keep_stdout) {
513
514         assert(context);
515         assert(saved_stdin);
516         assert(*saved_stdin >= 0);
517         assert(saved_stdout);
518         assert(*saved_stdout >= 0);
519
520         /* This returns positive EXIT_xxx return values instead of
521          * negative errno style values! */
522
523         if (is_terminal_input(context->std_input)) {
524
525                 /* The service wants terminal input. */
526
527                 *keep_stdin = true;
528                 *keep_stdout =
529                         context->std_output == EXEC_OUTPUT_INHERIT ||
530                         context->std_output == EXEC_OUTPUT_TTY;
531
532         } else {
533                 /* If the service doesn't want a controlling terminal,
534                  * then we need to get rid entirely of what we have
535                  * already. */
536
537                 if (release_terminal() < 0)
538                         return EXIT_STDIN;
539
540                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
541                         return EXIT_STDIN;
542
543                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
544                         return EXIT_STDOUT;
545
546                 *keep_stdout = *keep_stdin = false;
547         }
548
549         return 0;
550 }
551
552 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
553         bool keep_groups = false;
554         int r;
555
556         assert(context);
557
558         /* Lookup and set GID and supplementary group list. Here too
559          * we avoid NSS lookups for gid=0. */
560
561         if (context->group || username) {
562
563                 if (context->group) {
564                         const char *g = context->group;
565
566                         if ((r = get_group_creds(&g, &gid)) < 0)
567                                 return r;
568                 }
569
570                 /* First step, initialize groups from /etc/groups */
571                 if (username && gid != 0) {
572                         if (initgroups(username, gid) < 0)
573                                 return -errno;
574
575                         keep_groups = true;
576                 }
577
578                 /* Second step, set our gids */
579                 if (setresgid(gid, gid, gid) < 0)
580                         return -errno;
581         }
582
583         if (context->supplementary_groups) {
584                 int ngroups_max, k;
585                 gid_t *gids;
586                 char **i;
587
588                 /* Final step, initialize any manually set supplementary groups */
589                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
590
591                 if (!(gids = new(gid_t, ngroups_max)))
592                         return -ENOMEM;
593
594                 if (keep_groups) {
595                         if ((k = getgroups(ngroups_max, gids)) < 0) {
596                                 free(gids);
597                                 return -errno;
598                         }
599                 } else
600                         k = 0;
601
602                 STRV_FOREACH(i, context->supplementary_groups) {
603                         const char *g;
604
605                         if (k >= ngroups_max) {
606                                 free(gids);
607                                 return -E2BIG;
608                         }
609
610                         g = *i;
611                         r = get_group_creds(&g, gids+k);
612                         if (r < 0) {
613                                 free(gids);
614                                 return r;
615                         }
616
617                         k++;
618                 }
619
620                 if (setgroups(k, gids) < 0) {
621                         free(gids);
622                         return -errno;
623                 }
624
625                 free(gids);
626         }
627
628         return 0;
629 }
630
631 static int enforce_user(const ExecContext *context, uid_t uid) {
632         int r;
633         assert(context);
634
635         /* Sets (but doesn't lookup) the uid and make sure we keep the
636          * capabilities while doing so. */
637
638         if (context->capabilities) {
639                 cap_t d;
640                 static const cap_value_t bits[] = {
641                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
642                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
643                 };
644
645                 /* First step: If we need to keep capabilities but
646                  * drop privileges we need to make sure we keep our
647                  * caps, whiel we drop privileges. */
648                 if (uid != 0) {
649                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
650
651                         if (prctl(PR_GET_SECUREBITS) != sb)
652                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
653                                         return -errno;
654                 }
655
656                 /* Second step: set the capabilities. This will reduce
657                  * the capabilities to the minimum we need. */
658
659                 if (!(d = cap_dup(context->capabilities)))
660                         return -errno;
661
662                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
663                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
664                         r = -errno;
665                         cap_free(d);
666                         return r;
667                 }
668
669                 if (cap_set_proc(d) < 0) {
670                         r = -errno;
671                         cap_free(d);
672                         return r;
673                 }
674
675                 cap_free(d);
676         }
677
678         /* Third step: actually set the uids */
679         if (setresuid(uid, uid, uid) < 0)
680                 return -errno;
681
682         /* At this point we should have all necessary capabilities but
683            are otherwise a normal user. However, the caps might got
684            corrupted due to the setresuid() so we need clean them up
685            later. This is done outside of this call. */
686
687         return 0;
688 }
689
690 #ifdef HAVE_PAM
691
692 static int null_conv(
693                 int num_msg,
694                 const struct pam_message **msg,
695                 struct pam_response **resp,
696                 void *appdata_ptr) {
697
698         /* We don't support conversations */
699
700         return PAM_CONV_ERR;
701 }
702
703 static int setup_pam(
704                 const char *name,
705                 const char *user,
706                 const char *tty,
707                 char ***pam_env,
708                 int fds[], unsigned n_fds) {
709
710         static const struct pam_conv conv = {
711                 .conv = null_conv,
712                 .appdata_ptr = NULL
713         };
714
715         pam_handle_t *handle = NULL;
716         sigset_t ss, old_ss;
717         int pam_code = PAM_SUCCESS;
718         int err;
719         char **e = NULL;
720         bool close_session = false;
721         pid_t pam_pid = 0, parent_pid;
722
723         assert(name);
724         assert(user);
725         assert(pam_env);
726
727         /* We set up PAM in the parent process, then fork. The child
728          * will then stay around until killed via PR_GET_PDEATHSIG or
729          * systemd via the cgroup logic. It will then remove the PAM
730          * session again. The parent process will exec() the actual
731          * daemon. We do things this way to ensure that the main PID
732          * of the daemon is the one we initially fork()ed. */
733
734         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
735                 handle = NULL;
736                 goto fail;
737         }
738
739         if (tty)
740                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
741                         goto fail;
742
743         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
744                 goto fail;
745
746         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
747                 goto fail;
748
749         close_session = true;
750
751         if ((!(e = pam_getenvlist(handle)))) {
752                 pam_code = PAM_BUF_ERR;
753                 goto fail;
754         }
755
756         /* Block SIGTERM, so that we know that it won't get lost in
757          * the child */
758         if (sigemptyset(&ss) < 0 ||
759             sigaddset(&ss, SIGTERM) < 0 ||
760             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
761                 goto fail;
762
763         parent_pid = getpid();
764
765         if ((pam_pid = fork()) < 0)
766                 goto fail;
767
768         if (pam_pid == 0) {
769                 int sig;
770                 int r = EXIT_PAM;
771
772                 /* The child's job is to reset the PAM session on
773                  * termination */
774
775                 /* This string must fit in 10 chars (i.e. the length
776                  * of "/sbin/init"), to look pretty in /bin/ps */
777                 rename_process("(sd-pam)");
778
779                 /* Make sure we don't keep open the passed fds in this
780                 child. We assume that otherwise only those fds are
781                 open here that have been opened by PAM. */
782                 close_many(fds, n_fds);
783
784                 /* Wait until our parent died. This will most likely
785                  * not work since the kernel does not allow
786                  * unprivileged parents kill their privileged children
787                  * this way. We rely on the control groups kill logic
788                  * to do the rest for us. */
789                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
790                         goto child_finish;
791
792                 /* Check if our parent process might already have
793                  * died? */
794                 if (getppid() == parent_pid) {
795                         for (;;) {
796                                 if (sigwait(&ss, &sig) < 0) {
797                                         if (errno == EINTR)
798                                                 continue;
799
800                                         goto child_finish;
801                                 }
802
803                                 assert(sig == SIGTERM);
804                                 break;
805                         }
806                 }
807
808                 /* If our parent died we'll end the session */
809                 if (getppid() != parent_pid)
810                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
811                                 goto child_finish;
812
813                 r = 0;
814
815         child_finish:
816                 pam_end(handle, pam_code | PAM_DATA_SILENT);
817                 _exit(r);
818         }
819
820         /* If the child was forked off successfully it will do all the
821          * cleanups, so forget about the handle here. */
822         handle = NULL;
823
824         /* Unblock SIGTERM again in the parent */
825         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
826                 goto fail;
827
828         /* We close the log explicitly here, since the PAM modules
829          * might have opened it, but we don't want this fd around. */
830         closelog();
831
832         *pam_env = e;
833         e = NULL;
834
835         return 0;
836
837 fail:
838         if (pam_code != PAM_SUCCESS)
839                 err = -EPERM;  /* PAM errors do not map to errno */
840         else
841                 err = -errno;
842
843         if (handle) {
844                 if (close_session)
845                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
846
847                 pam_end(handle, pam_code | PAM_DATA_SILENT);
848         }
849
850         strv_free(e);
851
852         closelog();
853
854         if (pam_pid > 1) {
855                 kill(pam_pid, SIGTERM);
856                 kill(pam_pid, SIGCONT);
857         }
858
859         return err;
860 }
861 #endif
862
863 static int do_capability_bounding_set_drop(uint64_t drop) {
864         unsigned long i;
865         cap_t old_cap = NULL, new_cap = NULL;
866         cap_flag_value_t fv;
867         int r;
868
869         /* If we are run as PID 1 we will lack CAP_SETPCAP by default
870          * in the effective set (yes, the kernel drops that when
871          * executing init!), so get it back temporarily so that we can
872          * call PR_CAPBSET_DROP. */
873
874         old_cap = cap_get_proc();
875         if (!old_cap)
876                 return -errno;
877
878         if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
879                 r = -errno;
880                 goto finish;
881         }
882
883         if (fv != CAP_SET) {
884                 static const cap_value_t v = CAP_SETPCAP;
885
886                 new_cap = cap_dup(old_cap);
887                 if (!new_cap) {
888                         r = -errno;
889                         goto finish;
890                 }
891
892                 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
893                         r = -errno;
894                         goto finish;
895                 }
896
897                 if (cap_set_proc(new_cap) < 0) {
898                         r = -errno;
899                         goto finish;
900                 }
901         }
902
903         for (i = 0; i <= cap_last_cap(); i++)
904                 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
905                         if (prctl(PR_CAPBSET_DROP, i) < 0) {
906                                 r = -errno;
907                                 goto finish;
908                         }
909                 }
910
911         r = 0;
912
913 finish:
914         if (new_cap)
915                 cap_free(new_cap);
916
917         if (old_cap) {
918                 cap_set_proc(old_cap);
919                 cap_free(old_cap);
920         }
921
922         return r;
923 }
924
925 static void rename_process_from_path(const char *path) {
926         char process_name[11];
927         const char *p;
928         size_t l;
929
930         /* This resulting string must fit in 10 chars (i.e. the length
931          * of "/sbin/init") to look pretty in /bin/ps */
932
933         p = path_get_file_name(path);
934         if (isempty(p)) {
935                 rename_process("(...)");
936                 return;
937         }
938
939         l = strlen(p);
940         if (l > 8) {
941                 /* The end of the process name is usually more
942                  * interesting, since the first bit might just be
943                  * "systemd-" */
944                 p = p + l - 8;
945                 l = 8;
946         }
947
948         process_name[0] = '(';
949         memcpy(process_name+1, p, l);
950         process_name[1+l] = ')';
951         process_name[1+l+1] = 0;
952
953         rename_process(process_name);
954 }
955
956 int exec_spawn(ExecCommand *command,
957                char **argv,
958                const ExecContext *context,
959                int fds[], unsigned n_fds,
960                char **environment,
961                bool apply_permissions,
962                bool apply_chroot,
963                bool apply_tty_stdin,
964                bool confirm_spawn,
965                CGroupBonding *cgroup_bondings,
966                CGroupAttribute *cgroup_attributes,
967                const char *cgroup_suffix,
968                int idle_pipe[2],
969                pid_t *ret) {
970
971         pid_t pid;
972         int r;
973         char *line;
974         int socket_fd;
975         char **files_env = NULL;
976
977         assert(command);
978         assert(context);
979         assert(ret);
980         assert(fds || n_fds <= 0);
981
982         if (context->std_input == EXEC_INPUT_SOCKET ||
983             context->std_output == EXEC_OUTPUT_SOCKET ||
984             context->std_error == EXEC_OUTPUT_SOCKET) {
985
986                 if (n_fds != 1)
987                         return -EINVAL;
988
989                 socket_fd = fds[0];
990
991                 fds = NULL;
992                 n_fds = 0;
993         } else
994                 socket_fd = -1;
995
996         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
997                 log_error("Failed to load environment files: %s", strerror(-r));
998                 return r;
999         }
1000
1001         if (!argv)
1002                 argv = command->argv;
1003
1004         if (!(line = exec_command_line(argv))) {
1005                 r = -ENOMEM;
1006                 goto fail_parent;
1007         }
1008
1009         log_debug("About to execute: %s", line);
1010         free(line);
1011
1012         r = cgroup_bonding_realize_list(cgroup_bondings);
1013         if (r < 0)
1014                 goto fail_parent;
1015
1016         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1017
1018         if ((pid = fork()) < 0) {
1019                 r = -errno;
1020                 goto fail_parent;
1021         }
1022
1023         if (pid == 0) {
1024                 int i, err;
1025                 sigset_t ss;
1026                 const char *username = NULL, *home = NULL;
1027                 uid_t uid = (uid_t) -1;
1028                 gid_t gid = (gid_t) -1;
1029                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1030                 unsigned n_env = 0;
1031                 int saved_stdout = -1, saved_stdin = -1;
1032                 bool keep_stdout = false, keep_stdin = false, set_access = false;
1033
1034                 /* child */
1035
1036                 rename_process_from_path(command->path);
1037
1038                 /* We reset exactly these signals, since they are the
1039                  * only ones we set to SIG_IGN in the main daemon. All
1040                  * others we leave untouched because we set them to
1041                  * SIG_DFL or a valid handler initially, both of which
1042                  * will be demoted to SIG_DFL. */
1043                 default_signals(SIGNALS_CRASH_HANDLER,
1044                                 SIGNALS_IGNORE, -1);
1045
1046                 if (context->ignore_sigpipe)
1047                         ignore_signals(SIGPIPE, -1);
1048
1049                 assert_se(sigemptyset(&ss) == 0);
1050                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1051                         err = -errno;
1052                         r = EXIT_SIGNAL_MASK;
1053                         goto fail_child;
1054                 }
1055
1056                 if (idle_pipe) {
1057                         if (idle_pipe[1] >= 0)
1058                                 close_nointr_nofail(idle_pipe[1]);
1059                         if (idle_pipe[0] >= 0) {
1060                                 fd_wait_for_event(idle_pipe[0], POLLHUP, DEFAULT_TIMEOUT_USEC);
1061                                 close_nointr_nofail(idle_pipe[0]);
1062                         }
1063                 }
1064
1065                 /* Close sockets very early to make sure we don't
1066                  * block init reexecution because it cannot bind its
1067                  * sockets */
1068                 log_forget_fds();
1069                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1070                                            socket_fd >= 0 ? 1 : n_fds);
1071                 if (err < 0) {
1072                         r = EXIT_FDS;
1073                         goto fail_child;
1074                 }
1075
1076                 if (!context->same_pgrp)
1077                         if (setsid() < 0) {
1078                                 err = -errno;
1079                                 r = EXIT_SETSID;
1080                                 goto fail_child;
1081                         }
1082
1083                 if (context->tcpwrap_name) {
1084                         if (socket_fd >= 0)
1085                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1086                                         err = -EACCES;
1087                                         r = EXIT_TCPWRAP;
1088                                         goto fail_child;
1089                                 }
1090
1091                         for (i = 0; i < (int) n_fds; i++) {
1092                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1093                                         err = -EACCES;
1094                                         r = EXIT_TCPWRAP;
1095                                         goto fail_child;
1096                                 }
1097                         }
1098                 }
1099
1100                 exec_context_tty_reset(context);
1101
1102                 /* We skip the confirmation step if we shall not apply the TTY */
1103                 if (confirm_spawn &&
1104                     (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1105                         char response;
1106
1107                         /* Set up terminal for the question */
1108                         if ((r = setup_confirm_stdio(context,
1109                                                      &saved_stdin, &saved_stdout))) {
1110                                 err = -errno;
1111                                 goto fail_child;
1112                         }
1113
1114                         /* Now ask the question. */
1115                         if (!(line = exec_command_line(argv))) {
1116                                 err = -ENOMEM;
1117                                 r = EXIT_MEMORY;
1118                                 goto fail_child;
1119                         }
1120
1121                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1122                         free(line);
1123
1124                         if (r < 0 || response == 'n') {
1125                                 err = -ECANCELED;
1126                                 r = EXIT_CONFIRM;
1127                                 goto fail_child;
1128                         } else if (response == 's') {
1129                                 err = r = 0;
1130                                 goto fail_child;
1131                         }
1132
1133                         /* Release terminal for the question */
1134                         if ((r = restore_confirm_stdio(context,
1135                                                        &saved_stdin, &saved_stdout,
1136                                                        &keep_stdin, &keep_stdout))) {
1137                                 err = -errno;
1138                                 goto fail_child;
1139                         }
1140                 }
1141
1142                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1143                  * must sure to drop O_NONBLOCK */
1144                 if (socket_fd >= 0)
1145                         fd_nonblock(socket_fd, false);
1146
1147                 if (!keep_stdin) {
1148                         err = setup_input(context, socket_fd, apply_tty_stdin);
1149                         if (err < 0) {
1150                                 r = EXIT_STDIN;
1151                                 goto fail_child;
1152                         }
1153                 }
1154
1155                 if (!keep_stdout) {
1156                         err = setup_output(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1157                         if (err < 0) {
1158                                 r = EXIT_STDOUT;
1159                                 goto fail_child;
1160                         }
1161                 }
1162
1163                 err = setup_error(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1164                 if (err < 0) {
1165                         r = EXIT_STDERR;
1166                         goto fail_child;
1167                 }
1168
1169                 if (cgroup_bondings) {
1170                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1171                         if (err < 0) {
1172                                 r = EXIT_CGROUP;
1173                                 goto fail_child;
1174                         }
1175                 }
1176
1177                 if (context->oom_score_adjust_set) {
1178                         char t[16];
1179
1180                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1181                         char_array_0(t);
1182
1183                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1184                                 /* Compatibility with Linux <= 2.6.35 */
1185
1186                                 int adj;
1187
1188                                 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1189                                 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1190
1191                                 snprintf(t, sizeof(t), "%i", adj);
1192                                 char_array_0(t);
1193
1194                                 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1195                                     && errno != EACCES) {
1196                                         err = -errno;
1197                                         r = EXIT_OOM_ADJUST;
1198                                         goto fail_child;
1199                                 }
1200                         }
1201                 }
1202
1203                 if (context->nice_set)
1204                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1205                                 err = -errno;
1206                                 r = EXIT_NICE;
1207                                 goto fail_child;
1208                         }
1209
1210                 if (context->cpu_sched_set) {
1211                         struct sched_param param;
1212
1213                         zero(param);
1214                         param.sched_priority = context->cpu_sched_priority;
1215
1216                         if (sched_setscheduler(0, context->cpu_sched_policy |
1217                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1218                                 err = -errno;
1219                                 r = EXIT_SETSCHEDULER;
1220                                 goto fail_child;
1221                         }
1222                 }
1223
1224                 if (context->cpuset)
1225                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1226                                 err = -errno;
1227                                 r = EXIT_CPUAFFINITY;
1228                                 goto fail_child;
1229                         }
1230
1231                 if (context->ioprio_set)
1232                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1233                                 err = -errno;
1234                                 r = EXIT_IOPRIO;
1235                                 goto fail_child;
1236                         }
1237
1238                 if (context->timer_slack_nsec_set)
1239                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1240                                 err = -errno;
1241                                 r = EXIT_TIMERSLACK;
1242                                 goto fail_child;
1243                         }
1244
1245                 if (context->utmp_id)
1246                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1247
1248                 if (context->user) {
1249                         username = context->user;
1250                         err = get_user_creds(&username, &uid, &gid, &home);
1251                         if (err < 0) {
1252                                 r = EXIT_USER;
1253                                 goto fail_child;
1254                         }
1255
1256                         if (is_terminal_input(context->std_input)) {
1257                                 err = chown_terminal(STDIN_FILENO, uid);
1258                                 if (err < 0) {
1259                                         r = EXIT_STDIN;
1260                                         goto fail_child;
1261                                 }
1262                         }
1263
1264                         if (cgroup_bondings && context->control_group_modify) {
1265                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1266                                 if (err >= 0)
1267                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1268                                 if (err < 0) {
1269                                         r = EXIT_CGROUP;
1270                                         goto fail_child;
1271                                 }
1272
1273                                 set_access = true;
1274                         }
1275                 }
1276
1277                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1278                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1279                         if (err < 0) {
1280                                 r = EXIT_CGROUP;
1281                                 goto fail_child;
1282                         }
1283                 }
1284
1285                 if (apply_permissions) {
1286                         err = enforce_groups(context, username, gid);
1287                         if (err < 0) {
1288                                 r = EXIT_GROUP;
1289                                 goto fail_child;
1290                         }
1291                 }
1292
1293                 umask(context->umask);
1294
1295 #ifdef HAVE_PAM
1296                 if (context->pam_name && username) {
1297                         err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1298                         if (err < 0) {
1299                                 r = EXIT_PAM;
1300                                 goto fail_child;
1301                         }
1302                 }
1303 #endif
1304                 if (context->private_network) {
1305                         if (unshare(CLONE_NEWNET) < 0) {
1306                                 err = -errno;
1307                                 r = EXIT_NETWORK;
1308                                 goto fail_child;
1309                         }
1310
1311                         loopback_setup();
1312                 }
1313
1314                 if (strv_length(context->read_write_dirs) > 0 ||
1315                     strv_length(context->read_only_dirs) > 0 ||
1316                     strv_length(context->inaccessible_dirs) > 0 ||
1317                     context->mount_flags != MS_SHARED ||
1318                     context->private_tmp) {
1319                         err = setup_namespace(context->read_write_dirs,
1320                                               context->read_only_dirs,
1321                                               context->inaccessible_dirs,
1322                                               context->private_tmp,
1323                                               context->mount_flags);
1324                         if (err < 0) {
1325                                 r = EXIT_NAMESPACE;
1326                                 goto fail_child;
1327                         }
1328                 }
1329
1330                 if (apply_chroot) {
1331                         if (context->root_directory)
1332                                 if (chroot(context->root_directory) < 0) {
1333                                         err = -errno;
1334                                         r = EXIT_CHROOT;
1335                                         goto fail_child;
1336                                 }
1337
1338                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1339                                 err = -errno;
1340                                 r = EXIT_CHDIR;
1341                                 goto fail_child;
1342                         }
1343                 } else {
1344
1345                         char *d;
1346
1347                         if (asprintf(&d, "%s/%s",
1348                                      context->root_directory ? context->root_directory : "",
1349                                      context->working_directory ? context->working_directory : "") < 0) {
1350                                 err = -ENOMEM;
1351                                 r = EXIT_MEMORY;
1352                                 goto fail_child;
1353                         }
1354
1355                         if (chdir(d) < 0) {
1356                                 err = -errno;
1357                                 free(d);
1358                                 r = EXIT_CHDIR;
1359                                 goto fail_child;
1360                         }
1361
1362                         free(d);
1363                 }
1364
1365                 /* We repeat the fd closing here, to make sure that
1366                  * nothing is leaked from the PAM modules */
1367                 err = close_all_fds(fds, n_fds);
1368                 if (err >= 0)
1369                         err = shift_fds(fds, n_fds);
1370                 if (err >= 0)
1371                         err = flags_fds(fds, n_fds, context->non_blocking);
1372                 if (err < 0) {
1373                         r = EXIT_FDS;
1374                         goto fail_child;
1375                 }
1376
1377                 if (apply_permissions) {
1378
1379                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1380                                 if (!context->rlimit[i])
1381                                         continue;
1382
1383                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1384                                         err = -errno;
1385                                         r = EXIT_LIMITS;
1386                                         goto fail_child;
1387                                 }
1388                         }
1389
1390                         if (context->capability_bounding_set_drop) {
1391                                 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1392                                 if (err < 0) {
1393                                         r = EXIT_CAPABILITIES;
1394                                         goto fail_child;
1395                                 }
1396                         }
1397
1398                         if (context->user) {
1399                                 err = enforce_user(context, uid);
1400                                 if (err < 0) {
1401                                         r = EXIT_USER;
1402                                         goto fail_child;
1403                                 }
1404                         }
1405
1406                         /* PR_GET_SECUREBITS is not privileged, while
1407                          * PR_SET_SECUREBITS is. So to suppress
1408                          * potential EPERMs we'll try not to call
1409                          * PR_SET_SECUREBITS unless necessary. */
1410                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1411                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1412                                         err = -errno;
1413                                         r = EXIT_SECUREBITS;
1414                                         goto fail_child;
1415                                 }
1416
1417                         if (context->capabilities)
1418                                 if (cap_set_proc(context->capabilities) < 0) {
1419                                         err = -errno;
1420                                         r = EXIT_CAPABILITIES;
1421                                         goto fail_child;
1422                                 }
1423                 }
1424
1425                 if (!(our_env = new0(char*, 7))) {
1426                         err = -ENOMEM;
1427                         r = EXIT_MEMORY;
1428                         goto fail_child;
1429                 }
1430
1431                 if (n_fds > 0)
1432                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1433                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1434                                 err = -ENOMEM;
1435                                 r = EXIT_MEMORY;
1436                                 goto fail_child;
1437                         }
1438
1439                 if (home)
1440                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1441                                 err = -ENOMEM;
1442                                 r = EXIT_MEMORY;
1443                                 goto fail_child;
1444                         }
1445
1446                 if (username)
1447                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1448                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1449                                 err = -ENOMEM;
1450                                 r = EXIT_MEMORY;
1451                                 goto fail_child;
1452                         }
1453
1454                 if (is_terminal_input(context->std_input) ||
1455                     context->std_output == EXEC_OUTPUT_TTY ||
1456                     context->std_error == EXEC_OUTPUT_TTY)
1457                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1458                                 err = -ENOMEM;
1459                                 r = EXIT_MEMORY;
1460                                 goto fail_child;
1461                         }
1462
1463                 assert(n_env <= 7);
1464
1465                 if (!(final_env = strv_env_merge(
1466                                       5,
1467                                       environment,
1468                                       our_env,
1469                                       context->environment,
1470                                       files_env,
1471                                       pam_env,
1472                                       NULL))) {
1473                         err = -ENOMEM;
1474                         r = EXIT_MEMORY;
1475                         goto fail_child;
1476                 }
1477
1478                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1479                         err = -ENOMEM;
1480                         r = EXIT_MEMORY;
1481                         goto fail_child;
1482                 }
1483
1484                 final_env = strv_env_clean(final_env);
1485
1486                 execve(command->path, final_argv, final_env);
1487                 err = -errno;
1488                 r = EXIT_EXEC;
1489
1490         fail_child:
1491                 if (r != 0) {
1492                         log_open();
1493                         log_warning("Failed at step %s spawning %s: %s",
1494                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1495                                     command->path, strerror(-err));
1496                 }
1497
1498                 strv_free(our_env);
1499                 strv_free(final_env);
1500                 strv_free(pam_env);
1501                 strv_free(files_env);
1502                 strv_free(final_argv);
1503
1504                 if (saved_stdin >= 0)
1505                         close_nointr_nofail(saved_stdin);
1506
1507                 if (saved_stdout >= 0)
1508                         close_nointr_nofail(saved_stdout);
1509
1510                 _exit(r);
1511         }
1512
1513         strv_free(files_env);
1514
1515         /* We add the new process to the cgroup both in the child (so
1516          * that we can be sure that no user code is ever executed
1517          * outside of the cgroup) and in the parent (so that we can be
1518          * sure that when we kill the cgroup the process will be
1519          * killed too). */
1520         if (cgroup_bondings)
1521                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1522
1523         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1524
1525         exec_status_start(&command->exec_status, pid);
1526
1527         *ret = pid;
1528         return 0;
1529
1530 fail_parent:
1531         strv_free(files_env);
1532
1533         return r;
1534 }
1535
1536 void exec_context_init(ExecContext *c) {
1537         assert(c);
1538
1539         c->umask = 0022;
1540         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1541         c->cpu_sched_policy = SCHED_OTHER;
1542         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1543         c->syslog_level_prefix = true;
1544         c->mount_flags = MS_SHARED;
1545         c->kill_signal = SIGTERM;
1546         c->send_sigkill = true;
1547         c->control_group_persistent = -1;
1548         c->ignore_sigpipe = true;
1549 }
1550
1551 void exec_context_done(ExecContext *c) {
1552         unsigned l;
1553
1554         assert(c);
1555
1556         strv_free(c->environment);
1557         c->environment = NULL;
1558
1559         strv_free(c->environment_files);
1560         c->environment_files = NULL;
1561
1562         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1563                 free(c->rlimit[l]);
1564                 c->rlimit[l] = NULL;
1565         }
1566
1567         free(c->working_directory);
1568         c->working_directory = NULL;
1569         free(c->root_directory);
1570         c->root_directory = NULL;
1571
1572         free(c->tty_path);
1573         c->tty_path = NULL;
1574
1575         free(c->tcpwrap_name);
1576         c->tcpwrap_name = NULL;
1577
1578         free(c->syslog_identifier);
1579         c->syslog_identifier = NULL;
1580
1581         free(c->user);
1582         c->user = NULL;
1583
1584         free(c->group);
1585         c->group = NULL;
1586
1587         strv_free(c->supplementary_groups);
1588         c->supplementary_groups = NULL;
1589
1590         free(c->pam_name);
1591         c->pam_name = NULL;
1592
1593         if (c->capabilities) {
1594                 cap_free(c->capabilities);
1595                 c->capabilities = NULL;
1596         }
1597
1598         strv_free(c->read_only_dirs);
1599         c->read_only_dirs = NULL;
1600
1601         strv_free(c->read_write_dirs);
1602         c->read_write_dirs = NULL;
1603
1604         strv_free(c->inaccessible_dirs);
1605         c->inaccessible_dirs = NULL;
1606
1607         if (c->cpuset)
1608                 CPU_FREE(c->cpuset);
1609
1610         free(c->utmp_id);
1611         c->utmp_id = NULL;
1612 }
1613
1614 void exec_command_done(ExecCommand *c) {
1615         assert(c);
1616
1617         free(c->path);
1618         c->path = NULL;
1619
1620         strv_free(c->argv);
1621         c->argv = NULL;
1622 }
1623
1624 void exec_command_done_array(ExecCommand *c, unsigned n) {
1625         unsigned i;
1626
1627         for (i = 0; i < n; i++)
1628                 exec_command_done(c+i);
1629 }
1630
1631 void exec_command_free_list(ExecCommand *c) {
1632         ExecCommand *i;
1633
1634         while ((i = c)) {
1635                 LIST_REMOVE(ExecCommand, command, c, i);
1636                 exec_command_done(i);
1637                 free(i);
1638         }
1639 }
1640
1641 void exec_command_free_array(ExecCommand **c, unsigned n) {
1642         unsigned i;
1643
1644         for (i = 0; i < n; i++) {
1645                 exec_command_free_list(c[i]);
1646                 c[i] = NULL;
1647         }
1648 }
1649
1650 int exec_context_load_environment(const ExecContext *c, char ***l) {
1651         char **i, **r = NULL;
1652
1653         assert(c);
1654         assert(l);
1655
1656         STRV_FOREACH(i, c->environment_files) {
1657                 char *fn;
1658                 int k;
1659                 bool ignore = false;
1660                 char **p;
1661
1662                 fn = *i;
1663
1664                 if (fn[0] == '-') {
1665                         ignore = true;
1666                         fn ++;
1667                 }
1668
1669                 if (!path_is_absolute(fn)) {
1670
1671                         if (ignore)
1672                                 continue;
1673
1674                         strv_free(r);
1675                         return -EINVAL;
1676                 }
1677
1678                 if ((k = load_env_file(fn, &p)) < 0) {
1679
1680                         if (ignore)
1681                                 continue;
1682
1683                         strv_free(r);
1684                         return k;
1685                 }
1686
1687                 if (r == NULL)
1688                         r = p;
1689                 else {
1690                         char **m;
1691
1692                         m = strv_env_merge(2, r, p);
1693                         strv_free(r);
1694                         strv_free(p);
1695
1696                         if (!m)
1697                                 return -ENOMEM;
1698
1699                         r = m;
1700                 }
1701         }
1702
1703         *l = r;
1704
1705         return 0;
1706 }
1707
1708 static void strv_fprintf(FILE *f, char **l) {
1709         char **g;
1710
1711         assert(f);
1712
1713         STRV_FOREACH(g, l)
1714                 fprintf(f, " %s", *g);
1715 }
1716
1717 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1718         char ** e;
1719         unsigned i;
1720
1721         assert(c);
1722         assert(f);
1723
1724         if (!prefix)
1725                 prefix = "";
1726
1727         fprintf(f,
1728                 "%sUMask: %04o\n"
1729                 "%sWorkingDirectory: %s\n"
1730                 "%sRootDirectory: %s\n"
1731                 "%sNonBlocking: %s\n"
1732                 "%sPrivateTmp: %s\n"
1733                 "%sControlGroupModify: %s\n"
1734                 "%sControlGroupPersistent: %s\n"
1735                 "%sPrivateNetwork: %s\n",
1736                 prefix, c->umask,
1737                 prefix, c->working_directory ? c->working_directory : "/",
1738                 prefix, c->root_directory ? c->root_directory : "/",
1739                 prefix, yes_no(c->non_blocking),
1740                 prefix, yes_no(c->private_tmp),
1741                 prefix, yes_no(c->control_group_modify),
1742                 prefix, yes_no(c->control_group_persistent),
1743                 prefix, yes_no(c->private_network));
1744
1745         STRV_FOREACH(e, c->environment)
1746                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1747
1748         STRV_FOREACH(e, c->environment_files)
1749                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1750
1751         if (c->tcpwrap_name)
1752                 fprintf(f,
1753                         "%sTCPWrapName: %s\n",
1754                         prefix, c->tcpwrap_name);
1755
1756         if (c->nice_set)
1757                 fprintf(f,
1758                         "%sNice: %i\n",
1759                         prefix, c->nice);
1760
1761         if (c->oom_score_adjust_set)
1762                 fprintf(f,
1763                         "%sOOMScoreAdjust: %i\n",
1764                         prefix, c->oom_score_adjust);
1765
1766         for (i = 0; i < RLIM_NLIMITS; i++)
1767                 if (c->rlimit[i])
1768                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1769
1770         if (c->ioprio_set)
1771                 fprintf(f,
1772                         "%sIOSchedulingClass: %s\n"
1773                         "%sIOPriority: %i\n",
1774                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1775                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1776
1777         if (c->cpu_sched_set)
1778                 fprintf(f,
1779                         "%sCPUSchedulingPolicy: %s\n"
1780                         "%sCPUSchedulingPriority: %i\n"
1781                         "%sCPUSchedulingResetOnFork: %s\n",
1782                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1783                         prefix, c->cpu_sched_priority,
1784                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1785
1786         if (c->cpuset) {
1787                 fprintf(f, "%sCPUAffinity:", prefix);
1788                 for (i = 0; i < c->cpuset_ncpus; i++)
1789                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1790                                 fprintf(f, " %i", i);
1791                 fputs("\n", f);
1792         }
1793
1794         if (c->timer_slack_nsec_set)
1795                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1796
1797         fprintf(f,
1798                 "%sStandardInput: %s\n"
1799                 "%sStandardOutput: %s\n"
1800                 "%sStandardError: %s\n",
1801                 prefix, exec_input_to_string(c->std_input),
1802                 prefix, exec_output_to_string(c->std_output),
1803                 prefix, exec_output_to_string(c->std_error));
1804
1805         if (c->tty_path)
1806                 fprintf(f,
1807                         "%sTTYPath: %s\n"
1808                         "%sTTYReset: %s\n"
1809                         "%sTTYVHangup: %s\n"
1810                         "%sTTYVTDisallocate: %s\n",
1811                         prefix, c->tty_path,
1812                         prefix, yes_no(c->tty_reset),
1813                         prefix, yes_no(c->tty_vhangup),
1814                         prefix, yes_no(c->tty_vt_disallocate));
1815
1816         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1817             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1818             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1819             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1820                 fprintf(f,
1821                         "%sSyslogFacility: %s\n"
1822                         "%sSyslogLevel: %s\n",
1823                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1824                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1825
1826         if (c->capabilities) {
1827                 char *t;
1828                 if ((t = cap_to_text(c->capabilities, NULL))) {
1829                         fprintf(f, "%sCapabilities: %s\n",
1830                                 prefix, t);
1831                         cap_free(t);
1832                 }
1833         }
1834
1835         if (c->secure_bits)
1836                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1837                         prefix,
1838                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1839                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1840                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1841                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1842                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1843                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1844
1845         if (c->capability_bounding_set_drop) {
1846                 unsigned long l;
1847                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1848
1849                 for (l = 0; l <= cap_last_cap(); l++)
1850                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1851                                 char *t;
1852
1853                                 if ((t = cap_to_name(l))) {
1854                                         fprintf(f, " %s", t);
1855                                         cap_free(t);
1856                                 }
1857                         }
1858
1859                 fputs("\n", f);
1860         }
1861
1862         if (c->user)
1863                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1864         if (c->group)
1865                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1866
1867         if (strv_length(c->supplementary_groups) > 0) {
1868                 fprintf(f, "%sSupplementaryGroups:", prefix);
1869                 strv_fprintf(f, c->supplementary_groups);
1870                 fputs("\n", f);
1871         }
1872
1873         if (c->pam_name)
1874                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1875
1876         if (strv_length(c->read_write_dirs) > 0) {
1877                 fprintf(f, "%sReadWriteDirs:", prefix);
1878                 strv_fprintf(f, c->read_write_dirs);
1879                 fputs("\n", f);
1880         }
1881
1882         if (strv_length(c->read_only_dirs) > 0) {
1883                 fprintf(f, "%sReadOnlyDirs:", prefix);
1884                 strv_fprintf(f, c->read_only_dirs);
1885                 fputs("\n", f);
1886         }
1887
1888         if (strv_length(c->inaccessible_dirs) > 0) {
1889                 fprintf(f, "%sInaccessibleDirs:", prefix);
1890                 strv_fprintf(f, c->inaccessible_dirs);
1891                 fputs("\n", f);
1892         }
1893
1894         fprintf(f,
1895                 "%sKillMode: %s\n"
1896                 "%sKillSignal: SIG%s\n"
1897                 "%sSendSIGKILL: %s\n"
1898                 "%sIgnoreSIGPIPE: %s\n",
1899                 prefix, kill_mode_to_string(c->kill_mode),
1900                 prefix, signal_to_string(c->kill_signal),
1901                 prefix, yes_no(c->send_sigkill),
1902                 prefix, yes_no(c->ignore_sigpipe));
1903
1904         if (c->utmp_id)
1905                 fprintf(f,
1906                         "%sUtmpIdentifier: %s\n",
1907                         prefix, c->utmp_id);
1908 }
1909
1910 void exec_status_start(ExecStatus *s, pid_t pid) {
1911         assert(s);
1912
1913         zero(*s);
1914         s->pid = pid;
1915         dual_timestamp_get(&s->start_timestamp);
1916 }
1917
1918 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1919         assert(s);
1920
1921         if (s->pid && s->pid != pid)
1922                 zero(*s);
1923
1924         s->pid = pid;
1925         dual_timestamp_get(&s->exit_timestamp);
1926
1927         s->code = code;
1928         s->status = status;
1929
1930         if (context) {
1931                 if (context->utmp_id)
1932                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1933
1934                 exec_context_tty_reset(context);
1935         }
1936 }
1937
1938 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1939         char buf[FORMAT_TIMESTAMP_MAX];
1940
1941         assert(s);
1942         assert(f);
1943
1944         if (!prefix)
1945                 prefix = "";
1946
1947         if (s->pid <= 0)
1948                 return;
1949
1950         fprintf(f,
1951                 "%sPID: %lu\n",
1952                 prefix, (unsigned long) s->pid);
1953
1954         if (s->start_timestamp.realtime > 0)
1955                 fprintf(f,
1956                         "%sStart Timestamp: %s\n",
1957                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1958
1959         if (s->exit_timestamp.realtime > 0)
1960                 fprintf(f,
1961                         "%sExit Timestamp: %s\n"
1962                         "%sExit Code: %s\n"
1963                         "%sExit Status: %i\n",
1964                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1965                         prefix, sigchld_code_to_string(s->code),
1966                         prefix, s->status);
1967 }
1968
1969 char *exec_command_line(char **argv) {
1970         size_t k;
1971         char *n, *p, **a;
1972         bool first = true;
1973
1974         assert(argv);
1975
1976         k = 1;
1977         STRV_FOREACH(a, argv)
1978                 k += strlen(*a)+3;
1979
1980         if (!(n = new(char, k)))
1981                 return NULL;
1982
1983         p = n;
1984         STRV_FOREACH(a, argv) {
1985
1986                 if (!first)
1987                         *(p++) = ' ';
1988                 else
1989                         first = false;
1990
1991                 if (strpbrk(*a, WHITESPACE)) {
1992                         *(p++) = '\'';
1993                         p = stpcpy(p, *a);
1994                         *(p++) = '\'';
1995                 } else
1996                         p = stpcpy(p, *a);
1997
1998         }
1999
2000         *p = 0;
2001
2002         /* FIXME: this doesn't really handle arguments that have
2003          * spaces and ticks in them */
2004
2005         return n;
2006 }
2007
2008 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2009         char *p2;
2010         const char *prefix2;
2011
2012         char *cmd;
2013
2014         assert(c);
2015         assert(f);
2016
2017         if (!prefix)
2018                 prefix = "";
2019         p2 = strappend(prefix, "\t");
2020         prefix2 = p2 ? p2 : prefix;
2021
2022         cmd = exec_command_line(c->argv);
2023
2024         fprintf(f,
2025                 "%sCommand Line: %s\n",
2026                 prefix, cmd ? cmd : strerror(ENOMEM));
2027
2028         free(cmd);
2029
2030         exec_status_dump(&c->exec_status, f, prefix2);
2031
2032         free(p2);
2033 }
2034
2035 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2036         assert(f);
2037
2038         if (!prefix)
2039                 prefix = "";
2040
2041         LIST_FOREACH(command, c, c)
2042                 exec_command_dump(c, f, prefix);
2043 }
2044
2045 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2046         ExecCommand *end;
2047
2048         assert(l);
2049         assert(e);
2050
2051         if (*l) {
2052                 /* It's kind of important, that we keep the order here */
2053                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2054                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2055         } else
2056               *l = e;
2057 }
2058
2059 int exec_command_set(ExecCommand *c, const char *path, ...) {
2060         va_list ap;
2061         char **l, *p;
2062
2063         assert(c);
2064         assert(path);
2065
2066         va_start(ap, path);
2067         l = strv_new_ap(path, ap);
2068         va_end(ap);
2069
2070         if (!l)
2071                 return -ENOMEM;
2072
2073         if (!(p = strdup(path))) {
2074                 strv_free(l);
2075                 return -ENOMEM;
2076         }
2077
2078         free(c->path);
2079         c->path = p;
2080
2081         strv_free(c->argv);
2082         c->argv = l;
2083
2084         return 0;
2085 }
2086
2087 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2088         [EXEC_INPUT_NULL] = "null",
2089         [EXEC_INPUT_TTY] = "tty",
2090         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2091         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2092         [EXEC_INPUT_SOCKET] = "socket"
2093 };
2094
2095 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2096
2097 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2098         [EXEC_OUTPUT_INHERIT] = "inherit",
2099         [EXEC_OUTPUT_NULL] = "null",
2100         [EXEC_OUTPUT_TTY] = "tty",
2101         [EXEC_OUTPUT_SYSLOG] = "syslog",
2102         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2103         [EXEC_OUTPUT_KMSG] = "kmsg",
2104         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2105         [EXEC_OUTPUT_JOURNAL] = "journal",
2106         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2107         [EXEC_OUTPUT_SOCKET] = "socket"
2108 };
2109
2110 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2111
2112 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2113         [KILL_CONTROL_GROUP] = "control-group",
2114         [KILL_PROCESS] = "process",
2115         [KILL_NONE] = "none"
2116 };
2117
2118 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2119
2120 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2121         [KILL_MAIN] = "main",
2122         [KILL_CONTROL] = "control",
2123         [KILL_ALL] = "all"
2124 };
2125
2126 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);