chiark / gitweb /
service: introduce Type=idle and use it for gettys
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41
42 #ifdef HAVE_PAM
43 #include <security/pam_appl.h>
44 #endif
45
46 #include "execute.h"
47 #include "strv.h"
48 #include "macro.h"
49 #include "capability.h"
50 #include "util.h"
51 #include "log.h"
52 #include "ioprio.h"
53 #include "securebits.h"
54 #include "cgroup.h"
55 #include "namespace.h"
56 #include "tcpwrap.h"
57 #include "exit-status.h"
58 #include "missing.h"
59 #include "utmp-wtmp.h"
60 #include "def.h"
61 #include "loopback-setup.h"
62
63 /* This assumes there is a 'tty' group */
64 #define TTY_MODE 0620
65
66 static int shift_fds(int fds[], unsigned n_fds) {
67         int start, restart_from;
68
69         if (n_fds <= 0)
70                 return 0;
71
72         /* Modifies the fds array! (sorts it) */
73
74         assert(fds);
75
76         start = 0;
77         for (;;) {
78                 int i;
79
80                 restart_from = -1;
81
82                 for (i = start; i < (int) n_fds; i++) {
83                         int nfd;
84
85                         /* Already at right index? */
86                         if (fds[i] == i+3)
87                                 continue;
88
89                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
90                                 return -errno;
91
92                         close_nointr_nofail(fds[i]);
93                         fds[i] = nfd;
94
95                         /* Hmm, the fd we wanted isn't free? Then
96                          * let's remember that and try again from here*/
97                         if (nfd != i+3 && restart_from < 0)
98                                 restart_from = i;
99                 }
100
101                 if (restart_from < 0)
102                         break;
103
104                 start = restart_from;
105         }
106
107         return 0;
108 }
109
110 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
111         unsigned i;
112         int r;
113
114         if (n_fds <= 0)
115                 return 0;
116
117         assert(fds);
118
119         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
120
121         for (i = 0; i < n_fds; i++) {
122
123                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
124                         return r;
125
126                 /* We unconditionally drop FD_CLOEXEC from the fds,
127                  * since after all we want to pass these fds to our
128                  * children */
129
130                 if ((r = fd_cloexec(fds[i], false)) < 0)
131                         return r;
132         }
133
134         return 0;
135 }
136
137 static const char *tty_path(const ExecContext *context) {
138         assert(context);
139
140         if (context->tty_path)
141                 return context->tty_path;
142
143         return "/dev/console";
144 }
145
146 void exec_context_tty_reset(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_vhangup)
150                 terminal_vhangup(tty_path(context));
151
152         if (context->tty_reset)
153                 reset_terminal(tty_path(context));
154
155         if (context->tty_vt_disallocate && context->tty_path)
156                 vt_disallocate(context->tty_path);
157 }
158
159 static int open_null_as(int flags, int nfd) {
160         int fd, r;
161
162         assert(nfd >= 0);
163
164         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
165                 return -errno;
166
167         if (fd != nfd) {
168                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
169                 close_nointr_nofail(fd);
170         } else
171                 r = nfd;
172
173         return r;
174 }
175
176 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
177         int fd, r;
178         union sockaddr_union sa;
179
180         assert(context);
181         assert(output < _EXEC_OUTPUT_MAX);
182         assert(ident);
183         assert(nfd >= 0);
184
185         fd = socket(AF_UNIX, SOCK_STREAM, 0);
186         if (fd < 0)
187                 return -errno;
188
189         zero(sa);
190         sa.un.sun_family = AF_UNIX;
191         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
192
193         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
194         if (r < 0) {
195                 close_nointr_nofail(fd);
196                 return -errno;
197         }
198
199         if (shutdown(fd, SHUT_RD) < 0) {
200                 close_nointr_nofail(fd);
201                 return -errno;
202         }
203
204         dprintf(fd,
205                 "%s\n"
206                 "%i\n"
207                 "%i\n"
208                 "%i\n"
209                 "%i\n"
210                 "%i\n",
211                 context->syslog_identifier ? context->syslog_identifier : ident,
212                 context->syslog_priority,
213                 !!context->syslog_level_prefix,
214                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
215                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
216                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
217
218         if (fd != nfd) {
219                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
220                 close_nointr_nofail(fd);
221         } else
222                 r = nfd;
223
224         return r;
225 }
226 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
227         int fd, r;
228
229         assert(path);
230         assert(nfd >= 0);
231
232         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
233                 return fd;
234
235         if (fd != nfd) {
236                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
237                 close_nointr_nofail(fd);
238         } else
239                 r = nfd;
240
241         return r;
242 }
243
244 static bool is_terminal_input(ExecInput i) {
245         return
246                 i == EXEC_INPUT_TTY ||
247                 i == EXEC_INPUT_TTY_FORCE ||
248                 i == EXEC_INPUT_TTY_FAIL;
249 }
250
251 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
252
253         if (is_terminal_input(std_input) && !apply_tty_stdin)
254                 return EXEC_INPUT_NULL;
255
256         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
257                 return EXEC_INPUT_NULL;
258
259         return std_input;
260 }
261
262 static int fixup_output(ExecOutput std_output, int socket_fd) {
263
264         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
265                 return EXEC_OUTPUT_INHERIT;
266
267         return std_output;
268 }
269
270 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
271         ExecInput i;
272
273         assert(context);
274
275         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
276
277         switch (i) {
278
279         case EXEC_INPUT_NULL:
280                 return open_null_as(O_RDONLY, STDIN_FILENO);
281
282         case EXEC_INPUT_TTY:
283         case EXEC_INPUT_TTY_FORCE:
284         case EXEC_INPUT_TTY_FAIL: {
285                 int fd, r;
286
287                 if ((fd = acquire_terminal(
288                                      tty_path(context),
289                                      i == EXEC_INPUT_TTY_FAIL,
290                                      i == EXEC_INPUT_TTY_FORCE,
291                                      false)) < 0)
292                         return fd;
293
294                 if (fd != STDIN_FILENO) {
295                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
296                         close_nointr_nofail(fd);
297                 } else
298                         r = STDIN_FILENO;
299
300                 return r;
301         }
302
303         case EXEC_INPUT_SOCKET:
304                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
305
306         default:
307                 assert_not_reached("Unknown input type");
308         }
309 }
310
311 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
312         ExecOutput o;
313         ExecInput i;
314
315         assert(context);
316         assert(ident);
317
318         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
319         o = fixup_output(context->std_output, socket_fd);
320
321         /* This expects the input is already set up */
322
323         switch (o) {
324
325         case EXEC_OUTPUT_INHERIT:
326
327                 /* If input got downgraded, inherit the original value */
328                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
329                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
330
331                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
332                 if (i != EXEC_INPUT_NULL)
333                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
334
335                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
336                 if (getppid() != 1)
337                         return STDOUT_FILENO;
338
339                 /* We need to open /dev/null here anew, to get the
340                  * right access mode. So we fall through */
341
342         case EXEC_OUTPUT_NULL:
343                 return open_null_as(O_WRONLY, STDOUT_FILENO);
344
345         case EXEC_OUTPUT_TTY:
346                 if (is_terminal_input(i))
347                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
348
349                 /* We don't reset the terminal if this is just about output */
350                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
351
352         case EXEC_OUTPUT_SYSLOG:
353         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
354         case EXEC_OUTPUT_KMSG:
355         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
356         case EXEC_OUTPUT_JOURNAL:
357         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
358                 return connect_logger_as(context, o, ident, STDOUT_FILENO);
359
360         case EXEC_OUTPUT_SOCKET:
361                 assert(socket_fd >= 0);
362                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
363
364         default:
365                 assert_not_reached("Unknown output type");
366         }
367 }
368
369 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
370         ExecOutput o, e;
371         ExecInput i;
372
373         assert(context);
374         assert(ident);
375
376         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
377         o = fixup_output(context->std_output, socket_fd);
378         e = fixup_output(context->std_error, socket_fd);
379
380         /* This expects the input and output are already set up */
381
382         /* Don't change the stderr file descriptor if we inherit all
383          * the way and are not on a tty */
384         if (e == EXEC_OUTPUT_INHERIT &&
385             o == EXEC_OUTPUT_INHERIT &&
386             i == EXEC_INPUT_NULL &&
387             !is_terminal_input(context->std_input) &&
388             getppid () != 1)
389                 return STDERR_FILENO;
390
391         /* Duplicate from stdout if possible */
392         if (e == o || e == EXEC_OUTPUT_INHERIT)
393                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
394
395         switch (e) {
396
397         case EXEC_OUTPUT_NULL:
398                 return open_null_as(O_WRONLY, STDERR_FILENO);
399
400         case EXEC_OUTPUT_TTY:
401                 if (is_terminal_input(i))
402                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
403
404                 /* We don't reset the terminal if this is just about output */
405                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
406
407         case EXEC_OUTPUT_SYSLOG:
408         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
409         case EXEC_OUTPUT_KMSG:
410         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
411         case EXEC_OUTPUT_JOURNAL:
412         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
413                 return connect_logger_as(context, e, ident, STDERR_FILENO);
414
415         case EXEC_OUTPUT_SOCKET:
416                 assert(socket_fd >= 0);
417                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
418
419         default:
420                 assert_not_reached("Unknown error type");
421         }
422 }
423
424 static int chown_terminal(int fd, uid_t uid) {
425         struct stat st;
426
427         assert(fd >= 0);
428
429         /* This might fail. What matters are the results. */
430         (void) fchown(fd, uid, -1);
431         (void) fchmod(fd, TTY_MODE);
432
433         if (fstat(fd, &st) < 0)
434                 return -errno;
435
436         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
437                 return -EPERM;
438
439         return 0;
440 }
441
442 static int setup_confirm_stdio(const ExecContext *context,
443                                int *_saved_stdin,
444                                int *_saved_stdout) {
445         int fd = -1, saved_stdin, saved_stdout = -1, r;
446
447         assert(context);
448         assert(_saved_stdin);
449         assert(_saved_stdout);
450
451         /* This returns positive EXIT_xxx return values instead of
452          * negative errno style values! */
453
454         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
455                 return EXIT_STDIN;
456
457         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
458                 r = EXIT_STDOUT;
459                 goto fail;
460         }
461
462         if ((fd = acquire_terminal(
463                              tty_path(context),
464                              context->std_input == EXEC_INPUT_TTY_FAIL,
465                              context->std_input == EXEC_INPUT_TTY_FORCE,
466                              false)) < 0) {
467                 r = EXIT_STDIN;
468                 goto fail;
469         }
470
471         if (chown_terminal(fd, getuid()) < 0) {
472                 r = EXIT_STDIN;
473                 goto fail;
474         }
475
476         if (dup2(fd, STDIN_FILENO) < 0) {
477                 r = EXIT_STDIN;
478                 goto fail;
479         }
480
481         if (dup2(fd, STDOUT_FILENO) < 0) {
482                 r = EXIT_STDOUT;
483                 goto fail;
484         }
485
486         if (fd >= 2)
487                 close_nointr_nofail(fd);
488
489         *_saved_stdin = saved_stdin;
490         *_saved_stdout = saved_stdout;
491
492         return 0;
493
494 fail:
495         if (saved_stdout >= 0)
496                 close_nointr_nofail(saved_stdout);
497
498         if (saved_stdin >= 0)
499                 close_nointr_nofail(saved_stdin);
500
501         if (fd >= 0)
502                 close_nointr_nofail(fd);
503
504         return r;
505 }
506
507 static int restore_confirm_stdio(const ExecContext *context,
508                                  int *saved_stdin,
509                                  int *saved_stdout,
510                                  bool *keep_stdin,
511                                  bool *keep_stdout) {
512
513         assert(context);
514         assert(saved_stdin);
515         assert(*saved_stdin >= 0);
516         assert(saved_stdout);
517         assert(*saved_stdout >= 0);
518
519         /* This returns positive EXIT_xxx return values instead of
520          * negative errno style values! */
521
522         if (is_terminal_input(context->std_input)) {
523
524                 /* The service wants terminal input. */
525
526                 *keep_stdin = true;
527                 *keep_stdout =
528                         context->std_output == EXEC_OUTPUT_INHERIT ||
529                         context->std_output == EXEC_OUTPUT_TTY;
530
531         } else {
532                 /* If the service doesn't want a controlling terminal,
533                  * then we need to get rid entirely of what we have
534                  * already. */
535
536                 if (release_terminal() < 0)
537                         return EXIT_STDIN;
538
539                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
540                         return EXIT_STDIN;
541
542                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
543                         return EXIT_STDOUT;
544
545                 *keep_stdout = *keep_stdin = false;
546         }
547
548         return 0;
549 }
550
551 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
552         bool keep_groups = false;
553         int r;
554
555         assert(context);
556
557         /* Lookup and set GID and supplementary group list. Here too
558          * we avoid NSS lookups for gid=0. */
559
560         if (context->group || username) {
561
562                 if (context->group) {
563                         const char *g = context->group;
564
565                         if ((r = get_group_creds(&g, &gid)) < 0)
566                                 return r;
567                 }
568
569                 /* First step, initialize groups from /etc/groups */
570                 if (username && gid != 0) {
571                         if (initgroups(username, gid) < 0)
572                                 return -errno;
573
574                         keep_groups = true;
575                 }
576
577                 /* Second step, set our gids */
578                 if (setresgid(gid, gid, gid) < 0)
579                         return -errno;
580         }
581
582         if (context->supplementary_groups) {
583                 int ngroups_max, k;
584                 gid_t *gids;
585                 char **i;
586
587                 /* Final step, initialize any manually set supplementary groups */
588                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
589
590                 if (!(gids = new(gid_t, ngroups_max)))
591                         return -ENOMEM;
592
593                 if (keep_groups) {
594                         if ((k = getgroups(ngroups_max, gids)) < 0) {
595                                 free(gids);
596                                 return -errno;
597                         }
598                 } else
599                         k = 0;
600
601                 STRV_FOREACH(i, context->supplementary_groups) {
602                         const char *g;
603
604                         if (k >= ngroups_max) {
605                                 free(gids);
606                                 return -E2BIG;
607                         }
608
609                         g = *i;
610                         r = get_group_creds(&g, gids+k);
611                         if (r < 0) {
612                                 free(gids);
613                                 return r;
614                         }
615
616                         k++;
617                 }
618
619                 if (setgroups(k, gids) < 0) {
620                         free(gids);
621                         return -errno;
622                 }
623
624                 free(gids);
625         }
626
627         return 0;
628 }
629
630 static int enforce_user(const ExecContext *context, uid_t uid) {
631         int r;
632         assert(context);
633
634         /* Sets (but doesn't lookup) the uid and make sure we keep the
635          * capabilities while doing so. */
636
637         if (context->capabilities) {
638                 cap_t d;
639                 static const cap_value_t bits[] = {
640                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
641                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
642                 };
643
644                 /* First step: If we need to keep capabilities but
645                  * drop privileges we need to make sure we keep our
646                  * caps, whiel we drop privileges. */
647                 if (uid != 0) {
648                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
649
650                         if (prctl(PR_GET_SECUREBITS) != sb)
651                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
652                                         return -errno;
653                 }
654
655                 /* Second step: set the capabilities. This will reduce
656                  * the capabilities to the minimum we need. */
657
658                 if (!(d = cap_dup(context->capabilities)))
659                         return -errno;
660
661                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
662                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
663                         r = -errno;
664                         cap_free(d);
665                         return r;
666                 }
667
668                 if (cap_set_proc(d) < 0) {
669                         r = -errno;
670                         cap_free(d);
671                         return r;
672                 }
673
674                 cap_free(d);
675         }
676
677         /* Third step: actually set the uids */
678         if (setresuid(uid, uid, uid) < 0)
679                 return -errno;
680
681         /* At this point we should have all necessary capabilities but
682            are otherwise a normal user. However, the caps might got
683            corrupted due to the setresuid() so we need clean them up
684            later. This is done outside of this call. */
685
686         return 0;
687 }
688
689 #ifdef HAVE_PAM
690
691 static int null_conv(
692                 int num_msg,
693                 const struct pam_message **msg,
694                 struct pam_response **resp,
695                 void *appdata_ptr) {
696
697         /* We don't support conversations */
698
699         return PAM_CONV_ERR;
700 }
701
702 static int setup_pam(
703                 const char *name,
704                 const char *user,
705                 const char *tty,
706                 char ***pam_env,
707                 int fds[], unsigned n_fds) {
708
709         static const struct pam_conv conv = {
710                 .conv = null_conv,
711                 .appdata_ptr = NULL
712         };
713
714         pam_handle_t *handle = NULL;
715         sigset_t ss, old_ss;
716         int pam_code = PAM_SUCCESS;
717         int err;
718         char **e = NULL;
719         bool close_session = false;
720         pid_t pam_pid = 0, parent_pid;
721
722         assert(name);
723         assert(user);
724         assert(pam_env);
725
726         /* We set up PAM in the parent process, then fork. The child
727          * will then stay around until killed via PR_GET_PDEATHSIG or
728          * systemd via the cgroup logic. It will then remove the PAM
729          * session again. The parent process will exec() the actual
730          * daemon. We do things this way to ensure that the main PID
731          * of the daemon is the one we initially fork()ed. */
732
733         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
734                 handle = NULL;
735                 goto fail;
736         }
737
738         if (tty)
739                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
740                         goto fail;
741
742         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
743                 goto fail;
744
745         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
746                 goto fail;
747
748         close_session = true;
749
750         if ((!(e = pam_getenvlist(handle)))) {
751                 pam_code = PAM_BUF_ERR;
752                 goto fail;
753         }
754
755         /* Block SIGTERM, so that we know that it won't get lost in
756          * the child */
757         if (sigemptyset(&ss) < 0 ||
758             sigaddset(&ss, SIGTERM) < 0 ||
759             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
760                 goto fail;
761
762         parent_pid = getpid();
763
764         if ((pam_pid = fork()) < 0)
765                 goto fail;
766
767         if (pam_pid == 0) {
768                 int sig;
769                 int r = EXIT_PAM;
770
771                 /* The child's job is to reset the PAM session on
772                  * termination */
773
774                 /* This string must fit in 10 chars (i.e. the length
775                  * of "/sbin/init"), to look pretty in /bin/ps */
776                 rename_process("(sd-pam)");
777
778                 /* Make sure we don't keep open the passed fds in this
779                 child. We assume that otherwise only those fds are
780                 open here that have been opened by PAM. */
781                 close_many(fds, n_fds);
782
783                 /* Wait until our parent died. This will most likely
784                  * not work since the kernel does not allow
785                  * unprivileged parents kill their privileged children
786                  * this way. We rely on the control groups kill logic
787                  * to do the rest for us. */
788                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
789                         goto child_finish;
790
791                 /* Check if our parent process might already have
792                  * died? */
793                 if (getppid() == parent_pid) {
794                         for (;;) {
795                                 if (sigwait(&ss, &sig) < 0) {
796                                         if (errno == EINTR)
797                                                 continue;
798
799                                         goto child_finish;
800                                 }
801
802                                 assert(sig == SIGTERM);
803                                 break;
804                         }
805                 }
806
807                 /* If our parent died we'll end the session */
808                 if (getppid() != parent_pid)
809                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
810                                 goto child_finish;
811
812                 r = 0;
813
814         child_finish:
815                 pam_end(handle, pam_code | PAM_DATA_SILENT);
816                 _exit(r);
817         }
818
819         /* If the child was forked off successfully it will do all the
820          * cleanups, so forget about the handle here. */
821         handle = NULL;
822
823         /* Unblock SIGTERM again in the parent */
824         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
825                 goto fail;
826
827         /* We close the log explicitly here, since the PAM modules
828          * might have opened it, but we don't want this fd around. */
829         closelog();
830
831         *pam_env = e;
832         e = NULL;
833
834         return 0;
835
836 fail:
837         if (pam_code != PAM_SUCCESS)
838                 err = -EPERM;  /* PAM errors do not map to errno */
839         else
840                 err = -errno;
841
842         if (handle) {
843                 if (close_session)
844                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
845
846                 pam_end(handle, pam_code | PAM_DATA_SILENT);
847         }
848
849         strv_free(e);
850
851         closelog();
852
853         if (pam_pid > 1) {
854                 kill(pam_pid, SIGTERM);
855                 kill(pam_pid, SIGCONT);
856         }
857
858         return err;
859 }
860 #endif
861
862 static int do_capability_bounding_set_drop(uint64_t drop) {
863         unsigned long i;
864         cap_t old_cap = NULL, new_cap = NULL;
865         cap_flag_value_t fv;
866         int r;
867
868         /* If we are run as PID 1 we will lack CAP_SETPCAP by default
869          * in the effective set (yes, the kernel drops that when
870          * executing init!), so get it back temporarily so that we can
871          * call PR_CAPBSET_DROP. */
872
873         old_cap = cap_get_proc();
874         if (!old_cap)
875                 return -errno;
876
877         if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
878                 r = -errno;
879                 goto finish;
880         }
881
882         if (fv != CAP_SET) {
883                 static const cap_value_t v = CAP_SETPCAP;
884
885                 new_cap = cap_dup(old_cap);
886                 if (!new_cap) {
887                         r = -errno;
888                         goto finish;
889                 }
890
891                 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
892                         r = -errno;
893                         goto finish;
894                 }
895
896                 if (cap_set_proc(new_cap) < 0) {
897                         r = -errno;
898                         goto finish;
899                 }
900         }
901
902         for (i = 0; i <= cap_last_cap(); i++)
903                 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
904                         if (prctl(PR_CAPBSET_DROP, i) < 0) {
905                                 r = -errno;
906                                 goto finish;
907                         }
908                 }
909
910         r = 0;
911
912 finish:
913         if (new_cap)
914                 cap_free(new_cap);
915
916         if (old_cap) {
917                 cap_set_proc(old_cap);
918                 cap_free(old_cap);
919         }
920
921         return r;
922 }
923
924 static void rename_process_from_path(const char *path) {
925         char process_name[11];
926         const char *p;
927         size_t l;
928
929         /* This resulting string must fit in 10 chars (i.e. the length
930          * of "/sbin/init") to look pretty in /bin/ps */
931
932         p = file_name_from_path(path);
933         if (isempty(p)) {
934                 rename_process("(...)");
935                 return;
936         }
937
938         l = strlen(p);
939         if (l > 8) {
940                 /* The end of the process name is usually more
941                  * interesting, since the first bit might just be
942                  * "systemd-" */
943                 p = p + l - 8;
944                 l = 8;
945         }
946
947         process_name[0] = '(';
948         memcpy(process_name+1, p, l);
949         process_name[1+l] = ')';
950         process_name[1+l+1] = 0;
951
952         rename_process(process_name);
953 }
954
955 int exec_spawn(ExecCommand *command,
956                char **argv,
957                const ExecContext *context,
958                int fds[], unsigned n_fds,
959                char **environment,
960                bool apply_permissions,
961                bool apply_chroot,
962                bool apply_tty_stdin,
963                bool confirm_spawn,
964                CGroupBonding *cgroup_bondings,
965                CGroupAttribute *cgroup_attributes,
966                const char *cgroup_suffix,
967                int idle_pipe[2],
968                pid_t *ret) {
969
970         pid_t pid;
971         int r;
972         char *line;
973         int socket_fd;
974         char **files_env = NULL;
975
976         assert(command);
977         assert(context);
978         assert(ret);
979         assert(fds || n_fds <= 0);
980
981         if (context->std_input == EXEC_INPUT_SOCKET ||
982             context->std_output == EXEC_OUTPUT_SOCKET ||
983             context->std_error == EXEC_OUTPUT_SOCKET) {
984
985                 if (n_fds != 1)
986                         return -EINVAL;
987
988                 socket_fd = fds[0];
989
990                 fds = NULL;
991                 n_fds = 0;
992         } else
993                 socket_fd = -1;
994
995         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
996                 log_error("Failed to load environment files: %s", strerror(-r));
997                 return r;
998         }
999
1000         if (!argv)
1001                 argv = command->argv;
1002
1003         if (!(line = exec_command_line(argv))) {
1004                 r = -ENOMEM;
1005                 goto fail_parent;
1006         }
1007
1008         log_debug("About to execute: %s", line);
1009         free(line);
1010
1011         r = cgroup_bonding_realize_list(cgroup_bondings);
1012         if (r < 0)
1013                 goto fail_parent;
1014
1015         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1016
1017         if ((pid = fork()) < 0) {
1018                 r = -errno;
1019                 goto fail_parent;
1020         }
1021
1022         if (pid == 0) {
1023                 int i, err;
1024                 sigset_t ss;
1025                 const char *username = NULL, *home = NULL;
1026                 uid_t uid = (uid_t) -1;
1027                 gid_t gid = (gid_t) -1;
1028                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1029                 unsigned n_env = 0;
1030                 int saved_stdout = -1, saved_stdin = -1;
1031                 bool keep_stdout = false, keep_stdin = false, set_access = false;
1032
1033                 /* child */
1034
1035                 rename_process_from_path(command->path);
1036
1037                 /* We reset exactly these signals, since they are the
1038                  * only ones we set to SIG_IGN in the main daemon. All
1039                  * others we leave untouched because we set them to
1040                  * SIG_DFL or a valid handler initially, both of which
1041                  * will be demoted to SIG_DFL. */
1042                 default_signals(SIGNALS_CRASH_HANDLER,
1043                                 SIGNALS_IGNORE, -1);
1044
1045                 if (context->ignore_sigpipe)
1046                         ignore_signals(SIGPIPE, -1);
1047
1048                 assert_se(sigemptyset(&ss) == 0);
1049                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1050                         err = -errno;
1051                         r = EXIT_SIGNAL_MASK;
1052                         goto fail_child;
1053                 }
1054
1055                 if (idle_pipe) {
1056                         if (idle_pipe[1] >= 0)
1057                                 close_nointr_nofail(idle_pipe[1]);
1058                         if (idle_pipe[0] >= 0) {
1059                                 fd_wait_for_event(idle_pipe[0], POLLHUP, DEFAULT_TIMEOUT_USEC);
1060                                 close_nointr_nofail(idle_pipe[0]);
1061                         }
1062                 }
1063
1064                 /* Close sockets very early to make sure we don't
1065                  * block init reexecution because it cannot bind its
1066                  * sockets */
1067                 log_forget_fds();
1068                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1069                                            socket_fd >= 0 ? 1 : n_fds);
1070                 if (err < 0) {
1071                         r = EXIT_FDS;
1072                         goto fail_child;
1073                 }
1074
1075                 if (!context->same_pgrp)
1076                         if (setsid() < 0) {
1077                                 err = -errno;
1078                                 r = EXIT_SETSID;
1079                                 goto fail_child;
1080                         }
1081
1082                 if (context->tcpwrap_name) {
1083                         if (socket_fd >= 0)
1084                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1085                                         err = -EACCES;
1086                                         r = EXIT_TCPWRAP;
1087                                         goto fail_child;
1088                                 }
1089
1090                         for (i = 0; i < (int) n_fds; i++) {
1091                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1092                                         err = -EACCES;
1093                                         r = EXIT_TCPWRAP;
1094                                         goto fail_child;
1095                                 }
1096                         }
1097                 }
1098
1099                 exec_context_tty_reset(context);
1100
1101                 /* We skip the confirmation step if we shall not apply the TTY */
1102                 if (confirm_spawn &&
1103                     (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1104                         char response;
1105
1106                         /* Set up terminal for the question */
1107                         if ((r = setup_confirm_stdio(context,
1108                                                      &saved_stdin, &saved_stdout))) {
1109                                 err = -errno;
1110                                 goto fail_child;
1111                         }
1112
1113                         /* Now ask the question. */
1114                         if (!(line = exec_command_line(argv))) {
1115                                 err = -ENOMEM;
1116                                 r = EXIT_MEMORY;
1117                                 goto fail_child;
1118                         }
1119
1120                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1121                         free(line);
1122
1123                         if (r < 0 || response == 'n') {
1124                                 err = -ECANCELED;
1125                                 r = EXIT_CONFIRM;
1126                                 goto fail_child;
1127                         } else if (response == 's') {
1128                                 err = r = 0;
1129                                 goto fail_child;
1130                         }
1131
1132                         /* Release terminal for the question */
1133                         if ((r = restore_confirm_stdio(context,
1134                                                        &saved_stdin, &saved_stdout,
1135                                                        &keep_stdin, &keep_stdout))) {
1136                                 err = -errno;
1137                                 goto fail_child;
1138                         }
1139                 }
1140
1141                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1142                  * must sure to drop O_NONBLOCK */
1143                 if (socket_fd >= 0)
1144                         fd_nonblock(socket_fd, false);
1145
1146                 if (!keep_stdin) {
1147                         err = setup_input(context, socket_fd, apply_tty_stdin);
1148                         if (err < 0) {
1149                                 r = EXIT_STDIN;
1150                                 goto fail_child;
1151                         }
1152                 }
1153
1154                 if (!keep_stdout) {
1155                         err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1156                         if (err < 0) {
1157                                 r = EXIT_STDOUT;
1158                                 goto fail_child;
1159                         }
1160                 }
1161
1162                 err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1163                 if (err < 0) {
1164                         r = EXIT_STDERR;
1165                         goto fail_child;
1166                 }
1167
1168                 if (cgroup_bondings) {
1169                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1170                         if (err < 0) {
1171                                 r = EXIT_CGROUP;
1172                                 goto fail_child;
1173                         }
1174                 }
1175
1176                 if (context->oom_score_adjust_set) {
1177                         char t[16];
1178
1179                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1180                         char_array_0(t);
1181
1182                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1183                                 /* Compatibility with Linux <= 2.6.35 */
1184
1185                                 int adj;
1186
1187                                 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1188                                 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1189
1190                                 snprintf(t, sizeof(t), "%i", adj);
1191                                 char_array_0(t);
1192
1193                                 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1194                                     && errno != EACCES) {
1195                                         err = -errno;
1196                                         r = EXIT_OOM_ADJUST;
1197                                         goto fail_child;
1198                                 }
1199                         }
1200                 }
1201
1202                 if (context->nice_set)
1203                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1204                                 err = -errno;
1205                                 r = EXIT_NICE;
1206                                 goto fail_child;
1207                         }
1208
1209                 if (context->cpu_sched_set) {
1210                         struct sched_param param;
1211
1212                         zero(param);
1213                         param.sched_priority = context->cpu_sched_priority;
1214
1215                         if (sched_setscheduler(0, context->cpu_sched_policy |
1216                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1217                                 err = -errno;
1218                                 r = EXIT_SETSCHEDULER;
1219                                 goto fail_child;
1220                         }
1221                 }
1222
1223                 if (context->cpuset)
1224                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1225                                 err = -errno;
1226                                 r = EXIT_CPUAFFINITY;
1227                                 goto fail_child;
1228                         }
1229
1230                 if (context->ioprio_set)
1231                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1232                                 err = -errno;
1233                                 r = EXIT_IOPRIO;
1234                                 goto fail_child;
1235                         }
1236
1237                 if (context->timer_slack_nsec_set)
1238                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1239                                 err = -errno;
1240                                 r = EXIT_TIMERSLACK;
1241                                 goto fail_child;
1242                         }
1243
1244                 if (context->utmp_id)
1245                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1246
1247                 if (context->user) {
1248                         username = context->user;
1249                         err = get_user_creds(&username, &uid, &gid, &home);
1250                         if (err < 0) {
1251                                 r = EXIT_USER;
1252                                 goto fail_child;
1253                         }
1254
1255                         if (is_terminal_input(context->std_input)) {
1256                                 err = chown_terminal(STDIN_FILENO, uid);
1257                                 if (err < 0) {
1258                                         r = EXIT_STDIN;
1259                                         goto fail_child;
1260                                 }
1261                         }
1262
1263                         if (cgroup_bondings && context->control_group_modify) {
1264                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1265                                 if (err >= 0)
1266                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1267                                 if (err < 0) {
1268                                         r = EXIT_CGROUP;
1269                                         goto fail_child;
1270                                 }
1271
1272                                 set_access = true;
1273                         }
1274                 }
1275
1276                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1277                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1278                         if (err < 0) {
1279                                 r = EXIT_CGROUP;
1280                                 goto fail_child;
1281                         }
1282                 }
1283
1284                 if (apply_permissions) {
1285                         err = enforce_groups(context, username, gid);
1286                         if (err < 0) {
1287                                 r = EXIT_GROUP;
1288                                 goto fail_child;
1289                         }
1290                 }
1291
1292                 umask(context->umask);
1293
1294 #ifdef HAVE_PAM
1295                 if (context->pam_name && username) {
1296                         err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1297                         if (err < 0) {
1298                                 r = EXIT_PAM;
1299                                 goto fail_child;
1300                         }
1301                 }
1302 #endif
1303                 if (context->private_network) {
1304                         if (unshare(CLONE_NEWNET) < 0) {
1305                                 err = -errno;
1306                                 r = EXIT_NETWORK;
1307                                 goto fail_child;
1308                         }
1309
1310                         loopback_setup();
1311                 }
1312
1313                 if (strv_length(context->read_write_dirs) > 0 ||
1314                     strv_length(context->read_only_dirs) > 0 ||
1315                     strv_length(context->inaccessible_dirs) > 0 ||
1316                     context->mount_flags != MS_SHARED ||
1317                     context->private_tmp) {
1318                         err = setup_namespace(context->read_write_dirs,
1319                                               context->read_only_dirs,
1320                                               context->inaccessible_dirs,
1321                                               context->private_tmp,
1322                                               context->mount_flags);
1323                         if (err < 0) {
1324                                 r = EXIT_NAMESPACE;
1325                                 goto fail_child;
1326                         }
1327                 }
1328
1329                 if (apply_chroot) {
1330                         if (context->root_directory)
1331                                 if (chroot(context->root_directory) < 0) {
1332                                         err = -errno;
1333                                         r = EXIT_CHROOT;
1334                                         goto fail_child;
1335                                 }
1336
1337                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1338                                 err = -errno;
1339                                 r = EXIT_CHDIR;
1340                                 goto fail_child;
1341                         }
1342                 } else {
1343
1344                         char *d;
1345
1346                         if (asprintf(&d, "%s/%s",
1347                                      context->root_directory ? context->root_directory : "",
1348                                      context->working_directory ? context->working_directory : "") < 0) {
1349                                 err = -ENOMEM;
1350                                 r = EXIT_MEMORY;
1351                                 goto fail_child;
1352                         }
1353
1354                         if (chdir(d) < 0) {
1355                                 err = -errno;
1356                                 free(d);
1357                                 r = EXIT_CHDIR;
1358                                 goto fail_child;
1359                         }
1360
1361                         free(d);
1362                 }
1363
1364                 /* We repeat the fd closing here, to make sure that
1365                  * nothing is leaked from the PAM modules */
1366                 err = close_all_fds(fds, n_fds);
1367                 if (err >= 0)
1368                         err = shift_fds(fds, n_fds);
1369                 if (err >= 0)
1370                         err = flags_fds(fds, n_fds, context->non_blocking);
1371                 if (err < 0) {
1372                         r = EXIT_FDS;
1373                         goto fail_child;
1374                 }
1375
1376                 if (apply_permissions) {
1377
1378                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1379                                 if (!context->rlimit[i])
1380                                         continue;
1381
1382                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1383                                         err = -errno;
1384                                         r = EXIT_LIMITS;
1385                                         goto fail_child;
1386                                 }
1387                         }
1388
1389                         if (context->capability_bounding_set_drop) {
1390                                 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1391                                 if (err < 0) {
1392                                         r = EXIT_CAPABILITIES;
1393                                         goto fail_child;
1394                                 }
1395                         }
1396
1397                         if (context->user) {
1398                                 err = enforce_user(context, uid);
1399                                 if (err < 0) {
1400                                         r = EXIT_USER;
1401                                         goto fail_child;
1402                                 }
1403                         }
1404
1405                         /* PR_GET_SECUREBITS is not privileged, while
1406                          * PR_SET_SECUREBITS is. So to suppress
1407                          * potential EPERMs we'll try not to call
1408                          * PR_SET_SECUREBITS unless necessary. */
1409                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1410                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1411                                         err = -errno;
1412                                         r = EXIT_SECUREBITS;
1413                                         goto fail_child;
1414                                 }
1415
1416                         if (context->capabilities)
1417                                 if (cap_set_proc(context->capabilities) < 0) {
1418                                         err = -errno;
1419                                         r = EXIT_CAPABILITIES;
1420                                         goto fail_child;
1421                                 }
1422                 }
1423
1424                 if (!(our_env = new0(char*, 7))) {
1425                         err = -ENOMEM;
1426                         r = EXIT_MEMORY;
1427                         goto fail_child;
1428                 }
1429
1430                 if (n_fds > 0)
1431                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1432                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1433                                 err = -ENOMEM;
1434                                 r = EXIT_MEMORY;
1435                                 goto fail_child;
1436                         }
1437
1438                 if (home)
1439                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1440                                 err = -ENOMEM;
1441                                 r = EXIT_MEMORY;
1442                                 goto fail_child;
1443                         }
1444
1445                 if (username)
1446                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1447                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1448                                 err = -ENOMEM;
1449                                 r = EXIT_MEMORY;
1450                                 goto fail_child;
1451                         }
1452
1453                 if (is_terminal_input(context->std_input) ||
1454                     context->std_output == EXEC_OUTPUT_TTY ||
1455                     context->std_error == EXEC_OUTPUT_TTY)
1456                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1457                                 err = -ENOMEM;
1458                                 r = EXIT_MEMORY;
1459                                 goto fail_child;
1460                         }
1461
1462                 assert(n_env <= 7);
1463
1464                 if (!(final_env = strv_env_merge(
1465                                       5,
1466                                       environment,
1467                                       our_env,
1468                                       context->environment,
1469                                       files_env,
1470                                       pam_env,
1471                                       NULL))) {
1472                         err = -ENOMEM;
1473                         r = EXIT_MEMORY;
1474                         goto fail_child;
1475                 }
1476
1477                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1478                         err = -ENOMEM;
1479                         r = EXIT_MEMORY;
1480                         goto fail_child;
1481                 }
1482
1483                 final_env = strv_env_clean(final_env);
1484
1485                 execve(command->path, final_argv, final_env);
1486                 err = -errno;
1487                 r = EXIT_EXEC;
1488
1489         fail_child:
1490                 if (r != 0) {
1491                         log_open();
1492                         log_warning("Failed at step %s spawning %s: %s",
1493                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1494                                     command->path, strerror(-err));
1495                 }
1496
1497                 strv_free(our_env);
1498                 strv_free(final_env);
1499                 strv_free(pam_env);
1500                 strv_free(files_env);
1501                 strv_free(final_argv);
1502
1503                 if (saved_stdin >= 0)
1504                         close_nointr_nofail(saved_stdin);
1505
1506                 if (saved_stdout >= 0)
1507                         close_nointr_nofail(saved_stdout);
1508
1509                 _exit(r);
1510         }
1511
1512         strv_free(files_env);
1513
1514         /* We add the new process to the cgroup both in the child (so
1515          * that we can be sure that no user code is ever executed
1516          * outside of the cgroup) and in the parent (so that we can be
1517          * sure that when we kill the cgroup the process will be
1518          * killed too). */
1519         if (cgroup_bondings)
1520                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1521
1522         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1523
1524         exec_status_start(&command->exec_status, pid);
1525
1526         *ret = pid;
1527         return 0;
1528
1529 fail_parent:
1530         strv_free(files_env);
1531
1532         return r;
1533 }
1534
1535 void exec_context_init(ExecContext *c) {
1536         assert(c);
1537
1538         c->umask = 0022;
1539         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540         c->cpu_sched_policy = SCHED_OTHER;
1541         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542         c->syslog_level_prefix = true;
1543         c->mount_flags = MS_SHARED;
1544         c->kill_signal = SIGTERM;
1545         c->send_sigkill = true;
1546         c->control_group_persistent = -1;
1547         c->ignore_sigpipe = true;
1548 }
1549
1550 void exec_context_done(ExecContext *c) {
1551         unsigned l;
1552
1553         assert(c);
1554
1555         strv_free(c->environment);
1556         c->environment = NULL;
1557
1558         strv_free(c->environment_files);
1559         c->environment_files = NULL;
1560
1561         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1562                 free(c->rlimit[l]);
1563                 c->rlimit[l] = NULL;
1564         }
1565
1566         free(c->working_directory);
1567         c->working_directory = NULL;
1568         free(c->root_directory);
1569         c->root_directory = NULL;
1570
1571         free(c->tty_path);
1572         c->tty_path = NULL;
1573
1574         free(c->tcpwrap_name);
1575         c->tcpwrap_name = NULL;
1576
1577         free(c->syslog_identifier);
1578         c->syslog_identifier = NULL;
1579
1580         free(c->user);
1581         c->user = NULL;
1582
1583         free(c->group);
1584         c->group = NULL;
1585
1586         strv_free(c->supplementary_groups);
1587         c->supplementary_groups = NULL;
1588
1589         free(c->pam_name);
1590         c->pam_name = NULL;
1591
1592         if (c->capabilities) {
1593                 cap_free(c->capabilities);
1594                 c->capabilities = NULL;
1595         }
1596
1597         strv_free(c->read_only_dirs);
1598         c->read_only_dirs = NULL;
1599
1600         strv_free(c->read_write_dirs);
1601         c->read_write_dirs = NULL;
1602
1603         strv_free(c->inaccessible_dirs);
1604         c->inaccessible_dirs = NULL;
1605
1606         if (c->cpuset)
1607                 CPU_FREE(c->cpuset);
1608
1609         free(c->utmp_id);
1610         c->utmp_id = NULL;
1611 }
1612
1613 void exec_command_done(ExecCommand *c) {
1614         assert(c);
1615
1616         free(c->path);
1617         c->path = NULL;
1618
1619         strv_free(c->argv);
1620         c->argv = NULL;
1621 }
1622
1623 void exec_command_done_array(ExecCommand *c, unsigned n) {
1624         unsigned i;
1625
1626         for (i = 0; i < n; i++)
1627                 exec_command_done(c+i);
1628 }
1629
1630 void exec_command_free_list(ExecCommand *c) {
1631         ExecCommand *i;
1632
1633         while ((i = c)) {
1634                 LIST_REMOVE(ExecCommand, command, c, i);
1635                 exec_command_done(i);
1636                 free(i);
1637         }
1638 }
1639
1640 void exec_command_free_array(ExecCommand **c, unsigned n) {
1641         unsigned i;
1642
1643         for (i = 0; i < n; i++) {
1644                 exec_command_free_list(c[i]);
1645                 c[i] = NULL;
1646         }
1647 }
1648
1649 int exec_context_load_environment(const ExecContext *c, char ***l) {
1650         char **i, **r = NULL;
1651
1652         assert(c);
1653         assert(l);
1654
1655         STRV_FOREACH(i, c->environment_files) {
1656                 char *fn;
1657                 int k;
1658                 bool ignore = false;
1659                 char **p;
1660
1661                 fn = *i;
1662
1663                 if (fn[0] == '-') {
1664                         ignore = true;
1665                         fn ++;
1666                 }
1667
1668                 if (!path_is_absolute(fn)) {
1669
1670                         if (ignore)
1671                                 continue;
1672
1673                         strv_free(r);
1674                         return -EINVAL;
1675                 }
1676
1677                 if ((k = load_env_file(fn, &p)) < 0) {
1678
1679                         if (ignore)
1680                                 continue;
1681
1682                         strv_free(r);
1683                         return k;
1684                 }
1685
1686                 if (r == NULL)
1687                         r = p;
1688                 else {
1689                         char **m;
1690
1691                         m = strv_env_merge(2, r, p);
1692                         strv_free(r);
1693                         strv_free(p);
1694
1695                         if (!m)
1696                                 return -ENOMEM;
1697
1698                         r = m;
1699                 }
1700         }
1701
1702         *l = r;
1703
1704         return 0;
1705 }
1706
1707 static void strv_fprintf(FILE *f, char **l) {
1708         char **g;
1709
1710         assert(f);
1711
1712         STRV_FOREACH(g, l)
1713                 fprintf(f, " %s", *g);
1714 }
1715
1716 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1717         char ** e;
1718         unsigned i;
1719
1720         assert(c);
1721         assert(f);
1722
1723         if (!prefix)
1724                 prefix = "";
1725
1726         fprintf(f,
1727                 "%sUMask: %04o\n"
1728                 "%sWorkingDirectory: %s\n"
1729                 "%sRootDirectory: %s\n"
1730                 "%sNonBlocking: %s\n"
1731                 "%sPrivateTmp: %s\n"
1732                 "%sControlGroupModify: %s\n"
1733                 "%sControlGroupPersistent: %s\n"
1734                 "%sPrivateNetwork: %s\n",
1735                 prefix, c->umask,
1736                 prefix, c->working_directory ? c->working_directory : "/",
1737                 prefix, c->root_directory ? c->root_directory : "/",
1738                 prefix, yes_no(c->non_blocking),
1739                 prefix, yes_no(c->private_tmp),
1740                 prefix, yes_no(c->control_group_modify),
1741                 prefix, yes_no(c->control_group_persistent),
1742                 prefix, yes_no(c->private_network));
1743
1744         STRV_FOREACH(e, c->environment)
1745                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1746
1747         STRV_FOREACH(e, c->environment_files)
1748                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1749
1750         if (c->tcpwrap_name)
1751                 fprintf(f,
1752                         "%sTCPWrapName: %s\n",
1753                         prefix, c->tcpwrap_name);
1754
1755         if (c->nice_set)
1756                 fprintf(f,
1757                         "%sNice: %i\n",
1758                         prefix, c->nice);
1759
1760         if (c->oom_score_adjust_set)
1761                 fprintf(f,
1762                         "%sOOMScoreAdjust: %i\n",
1763                         prefix, c->oom_score_adjust);
1764
1765         for (i = 0; i < RLIM_NLIMITS; i++)
1766                 if (c->rlimit[i])
1767                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1768
1769         if (c->ioprio_set)
1770                 fprintf(f,
1771                         "%sIOSchedulingClass: %s\n"
1772                         "%sIOPriority: %i\n",
1773                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1774                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1775
1776         if (c->cpu_sched_set)
1777                 fprintf(f,
1778                         "%sCPUSchedulingPolicy: %s\n"
1779                         "%sCPUSchedulingPriority: %i\n"
1780                         "%sCPUSchedulingResetOnFork: %s\n",
1781                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1782                         prefix, c->cpu_sched_priority,
1783                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1784
1785         if (c->cpuset) {
1786                 fprintf(f, "%sCPUAffinity:", prefix);
1787                 for (i = 0; i < c->cpuset_ncpus; i++)
1788                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1789                                 fprintf(f, " %i", i);
1790                 fputs("\n", f);
1791         }
1792
1793         if (c->timer_slack_nsec_set)
1794                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1795
1796         fprintf(f,
1797                 "%sStandardInput: %s\n"
1798                 "%sStandardOutput: %s\n"
1799                 "%sStandardError: %s\n",
1800                 prefix, exec_input_to_string(c->std_input),
1801                 prefix, exec_output_to_string(c->std_output),
1802                 prefix, exec_output_to_string(c->std_error));
1803
1804         if (c->tty_path)
1805                 fprintf(f,
1806                         "%sTTYPath: %s\n"
1807                         "%sTTYReset: %s\n"
1808                         "%sTTYVHangup: %s\n"
1809                         "%sTTYVTDisallocate: %s\n",
1810                         prefix, c->tty_path,
1811                         prefix, yes_no(c->tty_reset),
1812                         prefix, yes_no(c->tty_vhangup),
1813                         prefix, yes_no(c->tty_vt_disallocate));
1814
1815         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1816             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1817             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1818             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1819                 fprintf(f,
1820                         "%sSyslogFacility: %s\n"
1821                         "%sSyslogLevel: %s\n",
1822                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1823                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1824
1825         if (c->capabilities) {
1826                 char *t;
1827                 if ((t = cap_to_text(c->capabilities, NULL))) {
1828                         fprintf(f, "%sCapabilities: %s\n",
1829                                 prefix, t);
1830                         cap_free(t);
1831                 }
1832         }
1833
1834         if (c->secure_bits)
1835                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1836                         prefix,
1837                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1838                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1839                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1840                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1841                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1842                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1843
1844         if (c->capability_bounding_set_drop) {
1845                 unsigned long l;
1846                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1847
1848                 for (l = 0; l <= cap_last_cap(); l++)
1849                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1850                                 char *t;
1851
1852                                 if ((t = cap_to_name(l))) {
1853                                         fprintf(f, " %s", t);
1854                                         cap_free(t);
1855                                 }
1856                         }
1857
1858                 fputs("\n", f);
1859         }
1860
1861         if (c->user)
1862                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1863         if (c->group)
1864                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1865
1866         if (strv_length(c->supplementary_groups) > 0) {
1867                 fprintf(f, "%sSupplementaryGroups:", prefix);
1868                 strv_fprintf(f, c->supplementary_groups);
1869                 fputs("\n", f);
1870         }
1871
1872         if (c->pam_name)
1873                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1874
1875         if (strv_length(c->read_write_dirs) > 0) {
1876                 fprintf(f, "%sReadWriteDirs:", prefix);
1877                 strv_fprintf(f, c->read_write_dirs);
1878                 fputs("\n", f);
1879         }
1880
1881         if (strv_length(c->read_only_dirs) > 0) {
1882                 fprintf(f, "%sReadOnlyDirs:", prefix);
1883                 strv_fprintf(f, c->read_only_dirs);
1884                 fputs("\n", f);
1885         }
1886
1887         if (strv_length(c->inaccessible_dirs) > 0) {
1888                 fprintf(f, "%sInaccessibleDirs:", prefix);
1889                 strv_fprintf(f, c->inaccessible_dirs);
1890                 fputs("\n", f);
1891         }
1892
1893         fprintf(f,
1894                 "%sKillMode: %s\n"
1895                 "%sKillSignal: SIG%s\n"
1896                 "%sSendSIGKILL: %s\n"
1897                 "%sIgnoreSIGPIPE: %s\n",
1898                 prefix, kill_mode_to_string(c->kill_mode),
1899                 prefix, signal_to_string(c->kill_signal),
1900                 prefix, yes_no(c->send_sigkill),
1901                 prefix, yes_no(c->ignore_sigpipe));
1902
1903         if (c->utmp_id)
1904                 fprintf(f,
1905                         "%sUtmpIdentifier: %s\n",
1906                         prefix, c->utmp_id);
1907 }
1908
1909 void exec_status_start(ExecStatus *s, pid_t pid) {
1910         assert(s);
1911
1912         zero(*s);
1913         s->pid = pid;
1914         dual_timestamp_get(&s->start_timestamp);
1915 }
1916
1917 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1918         assert(s);
1919
1920         if (s->pid && s->pid != pid)
1921                 zero(*s);
1922
1923         s->pid = pid;
1924         dual_timestamp_get(&s->exit_timestamp);
1925
1926         s->code = code;
1927         s->status = status;
1928
1929         if (context) {
1930                 if (context->utmp_id)
1931                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1932
1933                 exec_context_tty_reset(context);
1934         }
1935 }
1936
1937 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1938         char buf[FORMAT_TIMESTAMP_MAX];
1939
1940         assert(s);
1941         assert(f);
1942
1943         if (!prefix)
1944                 prefix = "";
1945
1946         if (s->pid <= 0)
1947                 return;
1948
1949         fprintf(f,
1950                 "%sPID: %lu\n",
1951                 prefix, (unsigned long) s->pid);
1952
1953         if (s->start_timestamp.realtime > 0)
1954                 fprintf(f,
1955                         "%sStart Timestamp: %s\n",
1956                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1957
1958         if (s->exit_timestamp.realtime > 0)
1959                 fprintf(f,
1960                         "%sExit Timestamp: %s\n"
1961                         "%sExit Code: %s\n"
1962                         "%sExit Status: %i\n",
1963                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1964                         prefix, sigchld_code_to_string(s->code),
1965                         prefix, s->status);
1966 }
1967
1968 char *exec_command_line(char **argv) {
1969         size_t k;
1970         char *n, *p, **a;
1971         bool first = true;
1972
1973         assert(argv);
1974
1975         k = 1;
1976         STRV_FOREACH(a, argv)
1977                 k += strlen(*a)+3;
1978
1979         if (!(n = new(char, k)))
1980                 return NULL;
1981
1982         p = n;
1983         STRV_FOREACH(a, argv) {
1984
1985                 if (!first)
1986                         *(p++) = ' ';
1987                 else
1988                         first = false;
1989
1990                 if (strpbrk(*a, WHITESPACE)) {
1991                         *(p++) = '\'';
1992                         p = stpcpy(p, *a);
1993                         *(p++) = '\'';
1994                 } else
1995                         p = stpcpy(p, *a);
1996
1997         }
1998
1999         *p = 0;
2000
2001         /* FIXME: this doesn't really handle arguments that have
2002          * spaces and ticks in them */
2003
2004         return n;
2005 }
2006
2007 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2008         char *p2;
2009         const char *prefix2;
2010
2011         char *cmd;
2012
2013         assert(c);
2014         assert(f);
2015
2016         if (!prefix)
2017                 prefix = "";
2018         p2 = strappend(prefix, "\t");
2019         prefix2 = p2 ? p2 : prefix;
2020
2021         cmd = exec_command_line(c->argv);
2022
2023         fprintf(f,
2024                 "%sCommand Line: %s\n",
2025                 prefix, cmd ? cmd : strerror(ENOMEM));
2026
2027         free(cmd);
2028
2029         exec_status_dump(&c->exec_status, f, prefix2);
2030
2031         free(p2);
2032 }
2033
2034 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2035         assert(f);
2036
2037         if (!prefix)
2038                 prefix = "";
2039
2040         LIST_FOREACH(command, c, c)
2041                 exec_command_dump(c, f, prefix);
2042 }
2043
2044 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2045         ExecCommand *end;
2046
2047         assert(l);
2048         assert(e);
2049
2050         if (*l) {
2051                 /* It's kind of important, that we keep the order here */
2052                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2053                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2054         } else
2055               *l = e;
2056 }
2057
2058 int exec_command_set(ExecCommand *c, const char *path, ...) {
2059         va_list ap;
2060         char **l, *p;
2061
2062         assert(c);
2063         assert(path);
2064
2065         va_start(ap, path);
2066         l = strv_new_ap(path, ap);
2067         va_end(ap);
2068
2069         if (!l)
2070                 return -ENOMEM;
2071
2072         if (!(p = strdup(path))) {
2073                 strv_free(l);
2074                 return -ENOMEM;
2075         }
2076
2077         free(c->path);
2078         c->path = p;
2079
2080         strv_free(c->argv);
2081         c->argv = l;
2082
2083         return 0;
2084 }
2085
2086 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2087         [EXEC_INPUT_NULL] = "null",
2088         [EXEC_INPUT_TTY] = "tty",
2089         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2090         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2091         [EXEC_INPUT_SOCKET] = "socket"
2092 };
2093
2094 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2095
2096 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2097         [EXEC_OUTPUT_INHERIT] = "inherit",
2098         [EXEC_OUTPUT_NULL] = "null",
2099         [EXEC_OUTPUT_TTY] = "tty",
2100         [EXEC_OUTPUT_SYSLOG] = "syslog",
2101         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2102         [EXEC_OUTPUT_KMSG] = "kmsg",
2103         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2104         [EXEC_OUTPUT_JOURNAL] = "journal",
2105         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2106         [EXEC_OUTPUT_SOCKET] = "socket"
2107 };
2108
2109 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2110
2111 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2112         [KILL_CONTROL_GROUP] = "control-group",
2113         [KILL_PROCESS] = "process",
2114         [KILL_NONE] = "none"
2115 };
2116
2117 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2118
2119 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2120         [KILL_MAIN] = "main",
2121         [KILL_CONTROL] = "control",
2122         [KILL_ALL] = "all"
2123 };
2124
2125 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);