chiark / gitweb /
core/execute: use cleanup_strv_free
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42
43 #ifdef HAVE_PAM
44 #include <security/pam_appl.h>
45 #endif
46
47 #include "execute.h"
48 #include "strv.h"
49 #include "macro.h"
50 #include "capability.h"
51 #include "util.h"
52 #include "log.h"
53 #include "ioprio.h"
54 #include "securebits.h"
55 #include "cgroup.h"
56 #include "namespace.h"
57 #include "tcpwrap.h"
58 #include "exit-status.h"
59 #include "missing.h"
60 #include "utmp-wtmp.h"
61 #include "def.h"
62 #include "loopback-setup.h"
63 #include "path-util.h"
64 #include "syscall-list.h"
65
66 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
67
68 /* This assumes there is a 'tty' group */
69 #define TTY_MODE 0620
70
71 static int shift_fds(int fds[], unsigned n_fds) {
72         int start, restart_from;
73
74         if (n_fds <= 0)
75                 return 0;
76
77         /* Modifies the fds array! (sorts it) */
78
79         assert(fds);
80
81         start = 0;
82         for (;;) {
83                 int i;
84
85                 restart_from = -1;
86
87                 for (i = start; i < (int) n_fds; i++) {
88                         int nfd;
89
90                         /* Already at right index? */
91                         if (fds[i] == i+3)
92                                 continue;
93
94                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
95                                 return -errno;
96
97                         close_nointr_nofail(fds[i]);
98                         fds[i] = nfd;
99
100                         /* Hmm, the fd we wanted isn't free? Then
101                          * let's remember that and try again from here*/
102                         if (nfd != i+3 && restart_from < 0)
103                                 restart_from = i;
104                 }
105
106                 if (restart_from < 0)
107                         break;
108
109                 start = restart_from;
110         }
111
112         return 0;
113 }
114
115 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
116         unsigned i;
117         int r;
118
119         if (n_fds <= 0)
120                 return 0;
121
122         assert(fds);
123
124         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
125
126         for (i = 0; i < n_fds; i++) {
127
128                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
129                         return r;
130
131                 /* We unconditionally drop FD_CLOEXEC from the fds,
132                  * since after all we want to pass these fds to our
133                  * children */
134
135                 if ((r = fd_cloexec(fds[i], false)) < 0)
136                         return r;
137         }
138
139         return 0;
140 }
141
142 static const char *tty_path(const ExecContext *context) {
143         assert(context);
144
145         if (context->tty_path)
146                 return context->tty_path;
147
148         return "/dev/console";
149 }
150
151 void exec_context_tty_reset(const ExecContext *context) {
152         assert(context);
153
154         if (context->tty_vhangup)
155                 terminal_vhangup(tty_path(context));
156
157         if (context->tty_reset)
158                 reset_terminal(tty_path(context));
159
160         if (context->tty_vt_disallocate && context->tty_path)
161                 vt_disallocate(context->tty_path);
162 }
163
164 static int open_null_as(int flags, int nfd) {
165         int fd, r;
166
167         assert(nfd >= 0);
168
169         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
170                 return -errno;
171
172         if (fd != nfd) {
173                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
174                 close_nointr_nofail(fd);
175         } else
176                 r = nfd;
177
178         return r;
179 }
180
181 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
182         int fd, r;
183         union sockaddr_union sa;
184
185         assert(context);
186         assert(output < _EXEC_OUTPUT_MAX);
187         assert(ident);
188         assert(nfd >= 0);
189
190         fd = socket(AF_UNIX, SOCK_STREAM, 0);
191         if (fd < 0)
192                 return -errno;
193
194         zero(sa);
195         sa.un.sun_family = AF_UNIX;
196         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
197
198         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
199         if (r < 0) {
200                 close_nointr_nofail(fd);
201                 return -errno;
202         }
203
204         if (shutdown(fd, SHUT_RD) < 0) {
205                 close_nointr_nofail(fd);
206                 return -errno;
207         }
208
209         dprintf(fd,
210                 "%s\n"
211                 "%s\n"
212                 "%i\n"
213                 "%i\n"
214                 "%i\n"
215                 "%i\n"
216                 "%i\n",
217                 context->syslog_identifier ? context->syslog_identifier : ident,
218                 unit_id,
219                 context->syslog_priority,
220                 !!context->syslog_level_prefix,
221                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
222                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
223                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
224
225         if (fd != nfd) {
226                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
227                 close_nointr_nofail(fd);
228         } else
229                 r = nfd;
230
231         return r;
232 }
233 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
234         int fd, r;
235
236         assert(path);
237         assert(nfd >= 0);
238
239         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
240                 return fd;
241
242         if (fd != nfd) {
243                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
244                 close_nointr_nofail(fd);
245         } else
246                 r = nfd;
247
248         return r;
249 }
250
251 static bool is_terminal_input(ExecInput i) {
252         return
253                 i == EXEC_INPUT_TTY ||
254                 i == EXEC_INPUT_TTY_FORCE ||
255                 i == EXEC_INPUT_TTY_FAIL;
256 }
257
258 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
259
260         if (is_terminal_input(std_input) && !apply_tty_stdin)
261                 return EXEC_INPUT_NULL;
262
263         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
264                 return EXEC_INPUT_NULL;
265
266         return std_input;
267 }
268
269 static int fixup_output(ExecOutput std_output, int socket_fd) {
270
271         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
272                 return EXEC_OUTPUT_INHERIT;
273
274         return std_output;
275 }
276
277 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
278         ExecInput i;
279
280         assert(context);
281
282         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
283
284         switch (i) {
285
286         case EXEC_INPUT_NULL:
287                 return open_null_as(O_RDONLY, STDIN_FILENO);
288
289         case EXEC_INPUT_TTY:
290         case EXEC_INPUT_TTY_FORCE:
291         case EXEC_INPUT_TTY_FAIL: {
292                 int fd, r;
293
294                 if ((fd = acquire_terminal(
295                                      tty_path(context),
296                                      i == EXEC_INPUT_TTY_FAIL,
297                                      i == EXEC_INPUT_TTY_FORCE,
298                                      false,
299                                      (usec_t) -1)) < 0)
300                         return fd;
301
302                 if (fd != STDIN_FILENO) {
303                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
304                         close_nointr_nofail(fd);
305                 } else
306                         r = STDIN_FILENO;
307
308                 return r;
309         }
310
311         case EXEC_INPUT_SOCKET:
312                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
313
314         default:
315                 assert_not_reached("Unknown input type");
316         }
317 }
318
319 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
320         ExecOutput o;
321         ExecInput i;
322
323         assert(context);
324         assert(ident);
325
326         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
327         o = fixup_output(context->std_output, socket_fd);
328
329         /* This expects the input is already set up */
330
331         switch (o) {
332
333         case EXEC_OUTPUT_INHERIT:
334
335                 /* If input got downgraded, inherit the original value */
336                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
337                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
338
339                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
340                 if (i != EXEC_INPUT_NULL)
341                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
342
343                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
344                 if (getppid() != 1)
345                         return STDOUT_FILENO;
346
347                 /* We need to open /dev/null here anew, to get the
348                  * right access mode. So we fall through */
349
350         case EXEC_OUTPUT_NULL:
351                 return open_null_as(O_WRONLY, STDOUT_FILENO);
352
353         case EXEC_OUTPUT_TTY:
354                 if (is_terminal_input(i))
355                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
356
357                 /* We don't reset the terminal if this is just about output */
358                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
359
360         case EXEC_OUTPUT_SYSLOG:
361         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
362         case EXEC_OUTPUT_KMSG:
363         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
364         case EXEC_OUTPUT_JOURNAL:
365         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
366                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
367
368         case EXEC_OUTPUT_SOCKET:
369                 assert(socket_fd >= 0);
370                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
371
372         default:
373                 assert_not_reached("Unknown output type");
374         }
375 }
376
377 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
378         ExecOutput o, e;
379         ExecInput i;
380
381         assert(context);
382         assert(ident);
383
384         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
385         o = fixup_output(context->std_output, socket_fd);
386         e = fixup_output(context->std_error, socket_fd);
387
388         /* This expects the input and output are already set up */
389
390         /* Don't change the stderr file descriptor if we inherit all
391          * the way and are not on a tty */
392         if (e == EXEC_OUTPUT_INHERIT &&
393             o == EXEC_OUTPUT_INHERIT &&
394             i == EXEC_INPUT_NULL &&
395             !is_terminal_input(context->std_input) &&
396             getppid () != 1)
397                 return STDERR_FILENO;
398
399         /* Duplicate from stdout if possible */
400         if (e == o || e == EXEC_OUTPUT_INHERIT)
401                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
402
403         switch (e) {
404
405         case EXEC_OUTPUT_NULL:
406                 return open_null_as(O_WRONLY, STDERR_FILENO);
407
408         case EXEC_OUTPUT_TTY:
409                 if (is_terminal_input(i))
410                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
411
412                 /* We don't reset the terminal if this is just about output */
413                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
414
415         case EXEC_OUTPUT_SYSLOG:
416         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
417         case EXEC_OUTPUT_KMSG:
418         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
419         case EXEC_OUTPUT_JOURNAL:
420         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
421                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
422
423         case EXEC_OUTPUT_SOCKET:
424                 assert(socket_fd >= 0);
425                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
426
427         default:
428                 assert_not_reached("Unknown error type");
429         }
430 }
431
432 static int chown_terminal(int fd, uid_t uid) {
433         struct stat st;
434
435         assert(fd >= 0);
436
437         /* This might fail. What matters are the results. */
438         (void) fchown(fd, uid, -1);
439         (void) fchmod(fd, TTY_MODE);
440
441         if (fstat(fd, &st) < 0)
442                 return -errno;
443
444         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
445                 return -EPERM;
446
447         return 0;
448 }
449
450 static int setup_confirm_stdio(int *_saved_stdin,
451                                int *_saved_stdout) {
452         int fd = -1, saved_stdin, saved_stdout = -1, r;
453
454         assert(_saved_stdin);
455         assert(_saved_stdout);
456
457         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
458         if (saved_stdin < 0)
459                 return -errno;
460
461         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
462         if (saved_stdout < 0) {
463                 r = errno;
464                 goto fail;
465         }
466
467         fd = acquire_terminal(
468                         "/dev/console",
469                         false,
470                         false,
471                         false,
472                         DEFAULT_CONFIRM_USEC);
473         if (fd < 0) {
474                 r = fd;
475                 goto fail;
476         }
477
478         r = chown_terminal(fd, getuid());
479         if (r < 0)
480                 goto fail;
481
482         if (dup2(fd, STDIN_FILENO) < 0) {
483                 r = -errno;
484                 goto fail;
485         }
486
487         if (dup2(fd, STDOUT_FILENO) < 0) {
488                 r = -errno;
489                 goto fail;
490         }
491
492         if (fd >= 2)
493                 close_nointr_nofail(fd);
494
495         *_saved_stdin = saved_stdin;
496         *_saved_stdout = saved_stdout;
497
498         return 0;
499
500 fail:
501         if (saved_stdout >= 0)
502                 close_nointr_nofail(saved_stdout);
503
504         if (saved_stdin >= 0)
505                 close_nointr_nofail(saved_stdin);
506
507         if (fd >= 0)
508                 close_nointr_nofail(fd);
509
510         return r;
511 }
512
513 static int write_confirm_message(const char *format, ...) {
514         int fd;
515         va_list ap;
516
517         assert(format);
518
519         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
520         if (fd < 0)
521                 return fd;
522
523         va_start(ap, format);
524         vdprintf(fd, format, ap);
525         va_end(ap);
526
527         close_nointr_nofail(fd);
528
529         return 0;
530 }
531
532 static int restore_confirm_stdio(int *saved_stdin,
533                                  int *saved_stdout) {
534
535         int r = 0;
536
537         assert(saved_stdin);
538         assert(saved_stdout);
539
540         release_terminal();
541
542         if (*saved_stdin >= 0)
543                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
544                         r = -errno;
545
546         if (*saved_stdout >= 0)
547                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
548                         r = -errno;
549
550         if (*saved_stdin >= 0)
551                 close_nointr_nofail(*saved_stdin);
552
553         if (*saved_stdout >= 0)
554                 close_nointr_nofail(*saved_stdout);
555
556         return r;
557 }
558
559 static int ask_for_confirmation(char *response, char **argv) {
560         int saved_stdout = -1, saved_stdin = -1, r;
561         char *line;
562
563         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
564         if (r < 0)
565                 return r;
566
567         line = exec_command_line(argv);
568         if (!line)
569                 return -ENOMEM;
570
571         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
572         free(line);
573
574         restore_confirm_stdio(&saved_stdin, &saved_stdout);
575
576         return r;
577 }
578
579 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
580         bool keep_groups = false;
581         int r;
582
583         assert(context);
584
585         /* Lookup and set GID and supplementary group list. Here too
586          * we avoid NSS lookups for gid=0. */
587
588         if (context->group || username) {
589
590                 if (context->group) {
591                         const char *g = context->group;
592
593                         if ((r = get_group_creds(&g, &gid)) < 0)
594                                 return r;
595                 }
596
597                 /* First step, initialize groups from /etc/groups */
598                 if (username && gid != 0) {
599                         if (initgroups(username, gid) < 0)
600                                 return -errno;
601
602                         keep_groups = true;
603                 }
604
605                 /* Second step, set our gids */
606                 if (setresgid(gid, gid, gid) < 0)
607                         return -errno;
608         }
609
610         if (context->supplementary_groups) {
611                 int ngroups_max, k;
612                 gid_t *gids;
613                 char **i;
614
615                 /* Final step, initialize any manually set supplementary groups */
616                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
617
618                 if (!(gids = new(gid_t, ngroups_max)))
619                         return -ENOMEM;
620
621                 if (keep_groups) {
622                         if ((k = getgroups(ngroups_max, gids)) < 0) {
623                                 free(gids);
624                                 return -errno;
625                         }
626                 } else
627                         k = 0;
628
629                 STRV_FOREACH(i, context->supplementary_groups) {
630                         const char *g;
631
632                         if (k >= ngroups_max) {
633                                 free(gids);
634                                 return -E2BIG;
635                         }
636
637                         g = *i;
638                         r = get_group_creds(&g, gids+k);
639                         if (r < 0) {
640                                 free(gids);
641                                 return r;
642                         }
643
644                         k++;
645                 }
646
647                 if (setgroups(k, gids) < 0) {
648                         free(gids);
649                         return -errno;
650                 }
651
652                 free(gids);
653         }
654
655         return 0;
656 }
657
658 static int enforce_user(const ExecContext *context, uid_t uid) {
659         int r;
660         assert(context);
661
662         /* Sets (but doesn't lookup) the uid and make sure we keep the
663          * capabilities while doing so. */
664
665         if (context->capabilities) {
666                 cap_t d;
667                 static const cap_value_t bits[] = {
668                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
669                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
670                 };
671
672                 /* First step: If we need to keep capabilities but
673                  * drop privileges we need to make sure we keep our
674                  * caps, whiel we drop privileges. */
675                 if (uid != 0) {
676                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
677
678                         if (prctl(PR_GET_SECUREBITS) != sb)
679                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
680                                         return -errno;
681                 }
682
683                 /* Second step: set the capabilities. This will reduce
684                  * the capabilities to the minimum we need. */
685
686                 if (!(d = cap_dup(context->capabilities)))
687                         return -errno;
688
689                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
690                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
691                         r = -errno;
692                         cap_free(d);
693                         return r;
694                 }
695
696                 if (cap_set_proc(d) < 0) {
697                         r = -errno;
698                         cap_free(d);
699                         return r;
700                 }
701
702                 cap_free(d);
703         }
704
705         /* Third step: actually set the uids */
706         if (setresuid(uid, uid, uid) < 0)
707                 return -errno;
708
709         /* At this point we should have all necessary capabilities but
710            are otherwise a normal user. However, the caps might got
711            corrupted due to the setresuid() so we need clean them up
712            later. This is done outside of this call. */
713
714         return 0;
715 }
716
717 #ifdef HAVE_PAM
718
719 static int null_conv(
720                 int num_msg,
721                 const struct pam_message **msg,
722                 struct pam_response **resp,
723                 void *appdata_ptr) {
724
725         /* We don't support conversations */
726
727         return PAM_CONV_ERR;
728 }
729
730 static int setup_pam(
731                 const char *name,
732                 const char *user,
733                 uid_t uid,
734                 const char *tty,
735                 char ***pam_env,
736                 int fds[], unsigned n_fds) {
737
738         static const struct pam_conv conv = {
739                 .conv = null_conv,
740                 .appdata_ptr = NULL
741         };
742
743         pam_handle_t *handle = NULL;
744         sigset_t ss, old_ss;
745         int pam_code = PAM_SUCCESS;
746         int err;
747         char **e = NULL;
748         bool close_session = false;
749         pid_t pam_pid = 0, parent_pid;
750
751         assert(name);
752         assert(user);
753         assert(pam_env);
754
755         /* We set up PAM in the parent process, then fork. The child
756          * will then stay around until killed via PR_GET_PDEATHSIG or
757          * systemd via the cgroup logic. It will then remove the PAM
758          * session again. The parent process will exec() the actual
759          * daemon. We do things this way to ensure that the main PID
760          * of the daemon is the one we initially fork()ed. */
761
762         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
763                 handle = NULL;
764                 goto fail;
765         }
766
767         if (tty)
768                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
769                         goto fail;
770
771         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
772                 goto fail;
773
774         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
775                 goto fail;
776
777         close_session = true;
778
779         if ((!(e = pam_getenvlist(handle)))) {
780                 pam_code = PAM_BUF_ERR;
781                 goto fail;
782         }
783
784         /* Block SIGTERM, so that we know that it won't get lost in
785          * the child */
786         if (sigemptyset(&ss) < 0 ||
787             sigaddset(&ss, SIGTERM) < 0 ||
788             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
789                 goto fail;
790
791         parent_pid = getpid();
792
793         if ((pam_pid = fork()) < 0)
794                 goto fail;
795
796         if (pam_pid == 0) {
797                 int sig;
798                 int r = EXIT_PAM;
799
800                 /* The child's job is to reset the PAM session on
801                  * termination */
802
803                 /* This string must fit in 10 chars (i.e. the length
804                  * of "/sbin/init"), to look pretty in /bin/ps */
805                 rename_process("(sd-pam)");
806
807                 /* Make sure we don't keep open the passed fds in this
808                 child. We assume that otherwise only those fds are
809                 open here that have been opened by PAM. */
810                 close_many(fds, n_fds);
811
812                 /* Drop privileges - we don't need any to pam_close_session
813                  * and this will make PR_SET_PDEATHSIG work in most cases.
814                  * If this fails, ignore the error - but expect sd-pam threads
815                  * to fail to exit normally */
816                 if (setresuid(uid, uid, uid) < 0)
817                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
818
819                 /* Wait until our parent died. This will only work if
820                  * the above setresuid() succeeds, otherwise the kernel
821                  * will not allow unprivileged parents kill their privileged
822                  * children this way. We rely on the control groups kill logic
823                  * to do the rest for us. */
824                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
825                         goto child_finish;
826
827                 /* Check if our parent process might already have
828                  * died? */
829                 if (getppid() == parent_pid) {
830                         for (;;) {
831                                 if (sigwait(&ss, &sig) < 0) {
832                                         if (errno == EINTR)
833                                                 continue;
834
835                                         goto child_finish;
836                                 }
837
838                                 assert(sig == SIGTERM);
839                                 break;
840                         }
841                 }
842
843                 /* If our parent died we'll end the session */
844                 if (getppid() != parent_pid)
845                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
846                                 goto child_finish;
847
848                 r = 0;
849
850         child_finish:
851                 pam_end(handle, pam_code | PAM_DATA_SILENT);
852                 _exit(r);
853         }
854
855         /* If the child was forked off successfully it will do all the
856          * cleanups, so forget about the handle here. */
857         handle = NULL;
858
859         /* Unblock SIGTERM again in the parent */
860         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
861                 goto fail;
862
863         /* We close the log explicitly here, since the PAM modules
864          * might have opened it, but we don't want this fd around. */
865         closelog();
866
867         *pam_env = e;
868         e = NULL;
869
870         return 0;
871
872 fail:
873         if (pam_code != PAM_SUCCESS)
874                 err = -EPERM;  /* PAM errors do not map to errno */
875         else
876                 err = -errno;
877
878         if (handle) {
879                 if (close_session)
880                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
881
882                 pam_end(handle, pam_code | PAM_DATA_SILENT);
883         }
884
885         strv_free(e);
886
887         closelog();
888
889         if (pam_pid > 1) {
890                 kill(pam_pid, SIGTERM);
891                 kill(pam_pid, SIGCONT);
892         }
893
894         return err;
895 }
896 #endif
897
898 static void rename_process_from_path(const char *path) {
899         char process_name[11];
900         const char *p;
901         size_t l;
902
903         /* This resulting string must fit in 10 chars (i.e. the length
904          * of "/sbin/init") to look pretty in /bin/ps */
905
906         p = path_get_file_name(path);
907         if (isempty(p)) {
908                 rename_process("(...)");
909                 return;
910         }
911
912         l = strlen(p);
913         if (l > 8) {
914                 /* The end of the process name is usually more
915                  * interesting, since the first bit might just be
916                  * "systemd-" */
917                 p = p + l - 8;
918                 l = 8;
919         }
920
921         process_name[0] = '(';
922         memcpy(process_name+1, p, l);
923         process_name[1+l] = ')';
924         process_name[1+l+1] = 0;
925
926         rename_process(process_name);
927 }
928
929 static int apply_seccomp(uint32_t *syscall_filter) {
930         static const struct sock_filter header[] = {
931                 VALIDATE_ARCHITECTURE,
932                 EXAMINE_SYSCALL
933         };
934         static const struct sock_filter footer[] = {
935                 _KILL_PROCESS
936         };
937
938         int i;
939         unsigned n;
940         struct sock_filter *f;
941         struct sock_fprog prog;
942
943         assert(syscall_filter);
944
945         /* First: count the syscalls to check for */
946         for (i = 0, n = 0; i < syscall_max(); i++)
947                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
948                         n++;
949
950         /* Second: build the filter program from a header the syscall
951          * matches and the footer */
952         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
953         memcpy(f, header, sizeof(header));
954
955         for (i = 0, n = 0; i < syscall_max(); i++)
956                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
957                         struct sock_filter item[] = {
958                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
959                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
960                         };
961
962                         assert_cc(ELEMENTSOF(item) == 2);
963
964                         f[ELEMENTSOF(header) + 2*n]  = item[0];
965                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
966
967                         n++;
968                 }
969
970         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
971
972         /* Third: install the filter */
973         zero(prog);
974         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
975         prog.filter = f;
976         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
977                 return -errno;
978
979         return 0;
980 }
981
982 int exec_spawn(ExecCommand *command,
983                char **argv,
984                const ExecContext *context,
985                int fds[], unsigned n_fds,
986                char **environment,
987                bool apply_permissions,
988                bool apply_chroot,
989                bool apply_tty_stdin,
990                bool confirm_spawn,
991                CGroupBonding *cgroup_bondings,
992                CGroupAttribute *cgroup_attributes,
993                const char *cgroup_suffix,
994                const char *unit_id,
995                int idle_pipe[2],
996                pid_t *ret) {
997
998         pid_t pid;
999         int r;
1000         char *line;
1001         int socket_fd;
1002         char _cleanup_strv_free_ **files_env = NULL;
1003
1004         assert(command);
1005         assert(context);
1006         assert(ret);
1007         assert(fds || n_fds <= 0);
1008
1009         if (context->std_input == EXEC_INPUT_SOCKET ||
1010             context->std_output == EXEC_OUTPUT_SOCKET ||
1011             context->std_error == EXEC_OUTPUT_SOCKET) {
1012
1013                 if (n_fds != 1)
1014                         return -EINVAL;
1015
1016                 socket_fd = fds[0];
1017
1018                 fds = NULL;
1019                 n_fds = 0;
1020         } else
1021                 socket_fd = -1;
1022
1023         r = exec_context_load_environment(context, &files_env);
1024         if (r < 0) {
1025                 log_error("Failed to load environment files: %s", strerror(-r));
1026                 return r;
1027         }
1028
1029         if (!argv)
1030                 argv = command->argv;
1031
1032         line = exec_command_line(argv);
1033         if (!line)
1034                 return log_oom();
1035
1036         log_debug("About to execute: %s", line);
1037         free(line);
1038
1039         r = cgroup_bonding_realize_list(cgroup_bondings);
1040         if (r < 0)
1041                 return r;
1042
1043         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1044
1045         pid = fork();
1046         if (pid < 0)
1047                 return -errno;
1048
1049         if (pid == 0) {
1050                 int i, err;
1051                 sigset_t ss;
1052                 const char *username = NULL, *home = NULL;
1053                 uid_t uid = (uid_t) -1;
1054                 gid_t gid = (gid_t) -1;
1055                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1056                         **final_env = NULL, **final_argv = NULL;
1057                 unsigned n_env = 0;
1058                 bool set_access = false;
1059
1060                 /* child */
1061
1062                 rename_process_from_path(command->path);
1063
1064                 /* We reset exactly these signals, since they are the
1065                  * only ones we set to SIG_IGN in the main daemon. All
1066                  * others we leave untouched because we set them to
1067                  * SIG_DFL or a valid handler initially, both of which
1068                  * will be demoted to SIG_DFL. */
1069                 default_signals(SIGNALS_CRASH_HANDLER,
1070                                 SIGNALS_IGNORE, -1);
1071
1072                 if (context->ignore_sigpipe)
1073                         ignore_signals(SIGPIPE, -1);
1074
1075                 assert_se(sigemptyset(&ss) == 0);
1076                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1077                         err = -errno;
1078                         r = EXIT_SIGNAL_MASK;
1079                         goto fail_child;
1080                 }
1081
1082                 if (idle_pipe) {
1083                         if (idle_pipe[1] >= 0)
1084                                 close_nointr_nofail(idle_pipe[1]);
1085                         if (idle_pipe[0] >= 0) {
1086                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1087                                 close_nointr_nofail(idle_pipe[0]);
1088                         }
1089                 }
1090
1091                 /* Close sockets very early to make sure we don't
1092                  * block init reexecution because it cannot bind its
1093                  * sockets */
1094                 log_forget_fds();
1095                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1096                                            socket_fd >= 0 ? 1 : n_fds);
1097                 if (err < 0) {
1098                         r = EXIT_FDS;
1099                         goto fail_child;
1100                 }
1101
1102                 if (!context->same_pgrp)
1103                         if (setsid() < 0) {
1104                                 err = -errno;
1105                                 r = EXIT_SETSID;
1106                                 goto fail_child;
1107                         }
1108
1109                 if (context->tcpwrap_name) {
1110                         if (socket_fd >= 0)
1111                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1112                                         err = -EACCES;
1113                                         r = EXIT_TCPWRAP;
1114                                         goto fail_child;
1115                                 }
1116
1117                         for (i = 0; i < (int) n_fds; i++) {
1118                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1119                                         err = -EACCES;
1120                                         r = EXIT_TCPWRAP;
1121                                         goto fail_child;
1122                                 }
1123                         }
1124                 }
1125
1126                 exec_context_tty_reset(context);
1127
1128                 if (confirm_spawn) {
1129                         char response;
1130
1131                         err = ask_for_confirmation(&response, argv);
1132                         if (err == -ETIMEDOUT)
1133                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1134                         else if (err < 0)
1135                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1136                         else if (response == 's') {
1137                                 write_confirm_message("Skipping execution.\n");
1138                                 err = -ECANCELED;
1139                                 r = EXIT_CONFIRM;
1140                                 goto fail_child;
1141                         } else if (response == 'n') {
1142                                 write_confirm_message("Failing execution.\n");
1143                                 err = r = 0;
1144                                 goto fail_child;
1145                         }
1146                 }
1147
1148                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1149                  * must sure to drop O_NONBLOCK */
1150                 if (socket_fd >= 0)
1151                         fd_nonblock(socket_fd, false);
1152
1153                 err = setup_input(context, socket_fd, apply_tty_stdin);
1154                 if (err < 0) {
1155                         r = EXIT_STDIN;
1156                         goto fail_child;
1157                 }
1158
1159                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1160                 if (err < 0) {
1161                         r = EXIT_STDOUT;
1162                         goto fail_child;
1163                 }
1164
1165                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1166                 if (err < 0) {
1167                         r = EXIT_STDERR;
1168                         goto fail_child;
1169                 }
1170
1171                 if (cgroup_bondings) {
1172                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1173                         if (err < 0) {
1174                                 r = EXIT_CGROUP;
1175                                 goto fail_child;
1176                         }
1177                 }
1178
1179                 if (context->oom_score_adjust_set) {
1180                         char t[16];
1181
1182                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1183                         char_array_0(t);
1184
1185                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1186                                 err = -errno;
1187                                 r = EXIT_OOM_ADJUST;
1188                                 goto fail_child;
1189                         }
1190                 }
1191
1192                 if (context->nice_set)
1193                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1194                                 err = -errno;
1195                                 r = EXIT_NICE;
1196                                 goto fail_child;
1197                         }
1198
1199                 if (context->cpu_sched_set) {
1200                         struct sched_param param;
1201
1202                         zero(param);
1203                         param.sched_priority = context->cpu_sched_priority;
1204
1205                         if (sched_setscheduler(0, context->cpu_sched_policy |
1206                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1207                                 err = -errno;
1208                                 r = EXIT_SETSCHEDULER;
1209                                 goto fail_child;
1210                         }
1211                 }
1212
1213                 if (context->cpuset)
1214                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1215                                 err = -errno;
1216                                 r = EXIT_CPUAFFINITY;
1217                                 goto fail_child;
1218                         }
1219
1220                 if (context->ioprio_set)
1221                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1222                                 err = -errno;
1223                                 r = EXIT_IOPRIO;
1224                                 goto fail_child;
1225                         }
1226
1227                 if (context->timer_slack_nsec != (nsec_t) -1)
1228                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1229                                 err = -errno;
1230                                 r = EXIT_TIMERSLACK;
1231                                 goto fail_child;
1232                         }
1233
1234                 if (context->utmp_id)
1235                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1236
1237                 if (context->user) {
1238                         username = context->user;
1239                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1240                         if (err < 0) {
1241                                 r = EXIT_USER;
1242                                 goto fail_child;
1243                         }
1244
1245                         if (is_terminal_input(context->std_input)) {
1246                                 err = chown_terminal(STDIN_FILENO, uid);
1247                                 if (err < 0) {
1248                                         r = EXIT_STDIN;
1249                                         goto fail_child;
1250                                 }
1251                         }
1252
1253                         if (cgroup_bondings && context->control_group_modify) {
1254                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1255                                 if (err >= 0)
1256                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1257                                 if (err < 0) {
1258                                         r = EXIT_CGROUP;
1259                                         goto fail_child;
1260                                 }
1261
1262                                 set_access = true;
1263                         }
1264                 }
1265
1266                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1267                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1268                         if (err < 0) {
1269                                 r = EXIT_CGROUP;
1270                                 goto fail_child;
1271                         }
1272                 }
1273
1274                 if (apply_permissions) {
1275                         err = enforce_groups(context, username, gid);
1276                         if (err < 0) {
1277                                 r = EXIT_GROUP;
1278                                 goto fail_child;
1279                         }
1280                 }
1281
1282                 umask(context->umask);
1283
1284 #ifdef HAVE_PAM
1285                 if (apply_permissions && context->pam_name && username) {
1286                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1287                         if (err < 0) {
1288                                 r = EXIT_PAM;
1289                                 goto fail_child;
1290                         }
1291                 }
1292 #endif
1293                 if (context->private_network) {
1294                         if (unshare(CLONE_NEWNET) < 0) {
1295                                 err = -errno;
1296                                 r = EXIT_NETWORK;
1297                                 goto fail_child;
1298                         }
1299
1300                         loopback_setup();
1301                 }
1302
1303                 if (strv_length(context->read_write_dirs) > 0 ||
1304                     strv_length(context->read_only_dirs) > 0 ||
1305                     strv_length(context->inaccessible_dirs) > 0 ||
1306                     context->mount_flags != 0 ||
1307                     context->private_tmp) {
1308                         err = setup_namespace(context->read_write_dirs,
1309                                               context->read_only_dirs,
1310                                               context->inaccessible_dirs,
1311                                               context->private_tmp,
1312                                               context->mount_flags);
1313                         if (err < 0) {
1314                                 r = EXIT_NAMESPACE;
1315                                 goto fail_child;
1316                         }
1317                 }
1318
1319                 if (apply_chroot) {
1320                         if (context->root_directory)
1321                                 if (chroot(context->root_directory) < 0) {
1322                                         err = -errno;
1323                                         r = EXIT_CHROOT;
1324                                         goto fail_child;
1325                                 }
1326
1327                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1328                                 err = -errno;
1329                                 r = EXIT_CHDIR;
1330                                 goto fail_child;
1331                         }
1332                 } else {
1333                         char _cleanup_free_ *d = NULL;
1334
1335                         if (asprintf(&d, "%s/%s",
1336                                      context->root_directory ? context->root_directory : "",
1337                                      context->working_directory ? context->working_directory : "") < 0) {
1338                                 err = -ENOMEM;
1339                                 r = EXIT_MEMORY;
1340                                 goto fail_child;
1341                         }
1342
1343                         if (chdir(d) < 0) {
1344                                 err = -errno;
1345                                 r = EXIT_CHDIR;
1346                                 goto fail_child;
1347                         }
1348                 }
1349
1350                 /* We repeat the fd closing here, to make sure that
1351                  * nothing is leaked from the PAM modules */
1352                 err = close_all_fds(fds, n_fds);
1353                 if (err >= 0)
1354                         err = shift_fds(fds, n_fds);
1355                 if (err >= 0)
1356                         err = flags_fds(fds, n_fds, context->non_blocking);
1357                 if (err < 0) {
1358                         r = EXIT_FDS;
1359                         goto fail_child;
1360                 }
1361
1362                 if (apply_permissions) {
1363
1364                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1365                                 if (!context->rlimit[i])
1366                                         continue;
1367
1368                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1369                                         err = -errno;
1370                                         r = EXIT_LIMITS;
1371                                         goto fail_child;
1372                                 }
1373                         }
1374
1375                         if (context->capability_bounding_set_drop) {
1376                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1377                                 if (err < 0) {
1378                                         r = EXIT_CAPABILITIES;
1379                                         goto fail_child;
1380                                 }
1381                         }
1382
1383                         if (context->user) {
1384                                 err = enforce_user(context, uid);
1385                                 if (err < 0) {
1386                                         r = EXIT_USER;
1387                                         goto fail_child;
1388                                 }
1389                         }
1390
1391                         /* PR_GET_SECUREBITS is not privileged, while
1392                          * PR_SET_SECUREBITS is. So to suppress
1393                          * potential EPERMs we'll try not to call
1394                          * PR_SET_SECUREBITS unless necessary. */
1395                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1396                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1397                                         err = -errno;
1398                                         r = EXIT_SECUREBITS;
1399                                         goto fail_child;
1400                                 }
1401
1402                         if (context->capabilities)
1403                                 if (cap_set_proc(context->capabilities) < 0) {
1404                                         err = -errno;
1405                                         r = EXIT_CAPABILITIES;
1406                                         goto fail_child;
1407                                 }
1408
1409                         if (context->no_new_privileges)
1410                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1411                                         err = -errno;
1412                                         r = EXIT_NO_NEW_PRIVILEGES;
1413                                         goto fail_child;
1414                                 }
1415
1416                         if (context->syscall_filter) {
1417                                 err = apply_seccomp(context->syscall_filter);
1418                                 if (err < 0) {
1419                                         r = EXIT_SECCOMP;
1420                                         goto fail_child;
1421                                 }
1422                         }
1423                 }
1424
1425                 if (!(our_env = new0(char*, 7))) {
1426                         err = -ENOMEM;
1427                         r = EXIT_MEMORY;
1428                         goto fail_child;
1429                 }
1430
1431                 if (n_fds > 0)
1432                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1433                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1434                                 err = -ENOMEM;
1435                                 r = EXIT_MEMORY;
1436                                 goto fail_child;
1437                         }
1438
1439                 if (home)
1440                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1441                                 err = -ENOMEM;
1442                                 r = EXIT_MEMORY;
1443                                 goto fail_child;
1444                         }
1445
1446                 if (username)
1447                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1448                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1449                                 err = -ENOMEM;
1450                                 r = EXIT_MEMORY;
1451                                 goto fail_child;
1452                         }
1453
1454                 if (is_terminal_input(context->std_input) ||
1455                     context->std_output == EXEC_OUTPUT_TTY ||
1456                     context->std_error == EXEC_OUTPUT_TTY)
1457                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1458                                 err = -ENOMEM;
1459                                 r = EXIT_MEMORY;
1460                                 goto fail_child;
1461                         }
1462
1463                 assert(n_env <= 7);
1464
1465                 if (!(final_env = strv_env_merge(
1466                                       5,
1467                                       environment,
1468                                       our_env,
1469                                       context->environment,
1470                                       files_env,
1471                                       pam_env,
1472                                       NULL))) {
1473                         err = -ENOMEM;
1474                         r = EXIT_MEMORY;
1475                         goto fail_child;
1476                 }
1477
1478                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1479                         err = -ENOMEM;
1480                         r = EXIT_MEMORY;
1481                         goto fail_child;
1482                 }
1483
1484                 final_env = strv_env_clean(final_env);
1485
1486                 execve(command->path, final_argv, final_env);
1487                 err = -errno;
1488                 r = EXIT_EXEC;
1489
1490         fail_child:
1491                 if (r != 0) {
1492                         log_open();
1493                         log_warning("Failed at step %s spawning %s: %s",
1494                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1495                                     command->path, strerror(-err));
1496                 }
1497
1498                 _exit(r);
1499         }
1500
1501         /* We add the new process to the cgroup both in the child (so
1502          * that we can be sure that no user code is ever executed
1503          * outside of the cgroup) and in the parent (so that we can be
1504          * sure that when we kill the cgroup the process will be
1505          * killed too). */
1506         if (cgroup_bondings)
1507                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1508
1509         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1510
1511         exec_status_start(&command->exec_status, pid);
1512
1513         *ret = pid;
1514         return 0;
1515 }
1516
1517 void exec_context_init(ExecContext *c) {
1518         assert(c);
1519
1520         c->umask = 0022;
1521         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1522         c->cpu_sched_policy = SCHED_OTHER;
1523         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1524         c->syslog_level_prefix = true;
1525         c->control_group_persistent = -1;
1526         c->ignore_sigpipe = true;
1527         c->timer_slack_nsec = (nsec_t) -1;
1528 }
1529
1530 void exec_context_done(ExecContext *c) {
1531         unsigned l;
1532
1533         assert(c);
1534
1535         strv_free(c->environment);
1536         c->environment = NULL;
1537
1538         strv_free(c->environment_files);
1539         c->environment_files = NULL;
1540
1541         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1542                 free(c->rlimit[l]);
1543                 c->rlimit[l] = NULL;
1544         }
1545
1546         free(c->working_directory);
1547         c->working_directory = NULL;
1548         free(c->root_directory);
1549         c->root_directory = NULL;
1550
1551         free(c->tty_path);
1552         c->tty_path = NULL;
1553
1554         free(c->tcpwrap_name);
1555         c->tcpwrap_name = NULL;
1556
1557         free(c->syslog_identifier);
1558         c->syslog_identifier = NULL;
1559
1560         free(c->user);
1561         c->user = NULL;
1562
1563         free(c->group);
1564         c->group = NULL;
1565
1566         strv_free(c->supplementary_groups);
1567         c->supplementary_groups = NULL;
1568
1569         free(c->pam_name);
1570         c->pam_name = NULL;
1571
1572         if (c->capabilities) {
1573                 cap_free(c->capabilities);
1574                 c->capabilities = NULL;
1575         }
1576
1577         strv_free(c->read_only_dirs);
1578         c->read_only_dirs = NULL;
1579
1580         strv_free(c->read_write_dirs);
1581         c->read_write_dirs = NULL;
1582
1583         strv_free(c->inaccessible_dirs);
1584         c->inaccessible_dirs = NULL;
1585
1586         if (c->cpuset)
1587                 CPU_FREE(c->cpuset);
1588
1589         free(c->utmp_id);
1590         c->utmp_id = NULL;
1591
1592         free(c->syscall_filter);
1593         c->syscall_filter = NULL;
1594 }
1595
1596 void exec_command_done(ExecCommand *c) {
1597         assert(c);
1598
1599         free(c->path);
1600         c->path = NULL;
1601
1602         strv_free(c->argv);
1603         c->argv = NULL;
1604 }
1605
1606 void exec_command_done_array(ExecCommand *c, unsigned n) {
1607         unsigned i;
1608
1609         for (i = 0; i < n; i++)
1610                 exec_command_done(c+i);
1611 }
1612
1613 void exec_command_free_list(ExecCommand *c) {
1614         ExecCommand *i;
1615
1616         while ((i = c)) {
1617                 LIST_REMOVE(ExecCommand, command, c, i);
1618                 exec_command_done(i);
1619                 free(i);
1620         }
1621 }
1622
1623 void exec_command_free_array(ExecCommand **c, unsigned n) {
1624         unsigned i;
1625
1626         for (i = 0; i < n; i++) {
1627                 exec_command_free_list(c[i]);
1628                 c[i] = NULL;
1629         }
1630 }
1631
1632 int exec_context_load_environment(const ExecContext *c, char ***l) {
1633         char **i, **r = NULL;
1634
1635         assert(c);
1636         assert(l);
1637
1638         STRV_FOREACH(i, c->environment_files) {
1639                 char *fn;
1640                 int k;
1641                 bool ignore = false;
1642                 char **p;
1643
1644                 fn = *i;
1645
1646                 if (fn[0] == '-') {
1647                         ignore = true;
1648                         fn ++;
1649                 }
1650
1651                 if (!path_is_absolute(fn)) {
1652
1653                         if (ignore)
1654                                 continue;
1655
1656                         strv_free(r);
1657                         return -EINVAL;
1658                 }
1659
1660                 if ((k = load_env_file(fn, &p)) < 0) {
1661
1662                         if (ignore)
1663                                 continue;
1664
1665                         strv_free(r);
1666                         return k;
1667                 }
1668
1669                 if (r == NULL)
1670                         r = p;
1671                 else {
1672                         char **m;
1673
1674                         m = strv_env_merge(2, r, p);
1675                         strv_free(r);
1676                         strv_free(p);
1677
1678                         if (!m)
1679                                 return -ENOMEM;
1680
1681                         r = m;
1682                 }
1683         }
1684
1685         *l = r;
1686
1687         return 0;
1688 }
1689
1690 static void strv_fprintf(FILE *f, char **l) {
1691         char **g;
1692
1693         assert(f);
1694
1695         STRV_FOREACH(g, l)
1696                 fprintf(f, " %s", *g);
1697 }
1698
1699 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1700         char ** e;
1701         unsigned i;
1702
1703         assert(c);
1704         assert(f);
1705
1706         if (!prefix)
1707                 prefix = "";
1708
1709         fprintf(f,
1710                 "%sUMask: %04o\n"
1711                 "%sWorkingDirectory: %s\n"
1712                 "%sRootDirectory: %s\n"
1713                 "%sNonBlocking: %s\n"
1714                 "%sPrivateTmp: %s\n"
1715                 "%sControlGroupModify: %s\n"
1716                 "%sControlGroupPersistent: %s\n"
1717                 "%sPrivateNetwork: %s\n"
1718                 "%sIgnoreSIGPIPE: %s\n",
1719                 prefix, c->umask,
1720                 prefix, c->working_directory ? c->working_directory : "/",
1721                 prefix, c->root_directory ? c->root_directory : "/",
1722                 prefix, yes_no(c->non_blocking),
1723                 prefix, yes_no(c->private_tmp),
1724                 prefix, yes_no(c->control_group_modify),
1725                 prefix, yes_no(c->control_group_persistent),
1726                 prefix, yes_no(c->private_network),
1727                 prefix, yes_no(c->ignore_sigpipe));
1728
1729         STRV_FOREACH(e, c->environment)
1730                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1731
1732         STRV_FOREACH(e, c->environment_files)
1733                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1734
1735         if (c->tcpwrap_name)
1736                 fprintf(f,
1737                         "%sTCPWrapName: %s\n",
1738                         prefix, c->tcpwrap_name);
1739
1740         if (c->nice_set)
1741                 fprintf(f,
1742                         "%sNice: %i\n",
1743                         prefix, c->nice);
1744
1745         if (c->oom_score_adjust_set)
1746                 fprintf(f,
1747                         "%sOOMScoreAdjust: %i\n",
1748                         prefix, c->oom_score_adjust);
1749
1750         for (i = 0; i < RLIM_NLIMITS; i++)
1751                 if (c->rlimit[i])
1752                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1753
1754         if (c->ioprio_set)
1755                 fprintf(f,
1756                         "%sIOSchedulingClass: %s\n"
1757                         "%sIOPriority: %i\n",
1758                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1759                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1760
1761         if (c->cpu_sched_set)
1762                 fprintf(f,
1763                         "%sCPUSchedulingPolicy: %s\n"
1764                         "%sCPUSchedulingPriority: %i\n"
1765                         "%sCPUSchedulingResetOnFork: %s\n",
1766                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1767                         prefix, c->cpu_sched_priority,
1768                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1769
1770         if (c->cpuset) {
1771                 fprintf(f, "%sCPUAffinity:", prefix);
1772                 for (i = 0; i < c->cpuset_ncpus; i++)
1773                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1774                                 fprintf(f, " %i", i);
1775                 fputs("\n", f);
1776         }
1777
1778         if (c->timer_slack_nsec != (nsec_t) -1)
1779                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1780
1781         fprintf(f,
1782                 "%sStandardInput: %s\n"
1783                 "%sStandardOutput: %s\n"
1784                 "%sStandardError: %s\n",
1785                 prefix, exec_input_to_string(c->std_input),
1786                 prefix, exec_output_to_string(c->std_output),
1787                 prefix, exec_output_to_string(c->std_error));
1788
1789         if (c->tty_path)
1790                 fprintf(f,
1791                         "%sTTYPath: %s\n"
1792                         "%sTTYReset: %s\n"
1793                         "%sTTYVHangup: %s\n"
1794                         "%sTTYVTDisallocate: %s\n",
1795                         prefix, c->tty_path,
1796                         prefix, yes_no(c->tty_reset),
1797                         prefix, yes_no(c->tty_vhangup),
1798                         prefix, yes_no(c->tty_vt_disallocate));
1799
1800         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1801             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1802             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1803             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1804                 fprintf(f,
1805                         "%sSyslogFacility: %s\n"
1806                         "%sSyslogLevel: %s\n",
1807                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1808                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1809
1810         if (c->capabilities) {
1811                 char *t;
1812                 if ((t = cap_to_text(c->capabilities, NULL))) {
1813                         fprintf(f, "%sCapabilities: %s\n",
1814                                 prefix, t);
1815                         cap_free(t);
1816                 }
1817         }
1818
1819         if (c->secure_bits)
1820                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1821                         prefix,
1822                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1823                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1824                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1825                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1826                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1827                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1828
1829         if (c->capability_bounding_set_drop) {
1830                 unsigned long l;
1831                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1832
1833                 for (l = 0; l <= cap_last_cap(); l++)
1834                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1835                                 char *t;
1836
1837                                 if ((t = cap_to_name(l))) {
1838                                         fprintf(f, " %s", t);
1839                                         cap_free(t);
1840                                 }
1841                         }
1842
1843                 fputs("\n", f);
1844         }
1845
1846         if (c->user)
1847                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1848         if (c->group)
1849                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1850
1851         if (strv_length(c->supplementary_groups) > 0) {
1852                 fprintf(f, "%sSupplementaryGroups:", prefix);
1853                 strv_fprintf(f, c->supplementary_groups);
1854                 fputs("\n", f);
1855         }
1856
1857         if (c->pam_name)
1858                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1859
1860         if (strv_length(c->read_write_dirs) > 0) {
1861                 fprintf(f, "%sReadWriteDirs:", prefix);
1862                 strv_fprintf(f, c->read_write_dirs);
1863                 fputs("\n", f);
1864         }
1865
1866         if (strv_length(c->read_only_dirs) > 0) {
1867                 fprintf(f, "%sReadOnlyDirs:", prefix);
1868                 strv_fprintf(f, c->read_only_dirs);
1869                 fputs("\n", f);
1870         }
1871
1872         if (strv_length(c->inaccessible_dirs) > 0) {
1873                 fprintf(f, "%sInaccessibleDirs:", prefix);
1874                 strv_fprintf(f, c->inaccessible_dirs);
1875                 fputs("\n", f);
1876         }
1877
1878         if (c->utmp_id)
1879                 fprintf(f,
1880                         "%sUtmpIdentifier: %s\n",
1881                         prefix, c->utmp_id);
1882 }
1883
1884 void exec_status_start(ExecStatus *s, pid_t pid) {
1885         assert(s);
1886
1887         zero(*s);
1888         s->pid = pid;
1889         dual_timestamp_get(&s->start_timestamp);
1890 }
1891
1892 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1893         assert(s);
1894
1895         if (s->pid && s->pid != pid)
1896                 zero(*s);
1897
1898         s->pid = pid;
1899         dual_timestamp_get(&s->exit_timestamp);
1900
1901         s->code = code;
1902         s->status = status;
1903
1904         if (context) {
1905                 if (context->utmp_id)
1906                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1907
1908                 exec_context_tty_reset(context);
1909         }
1910 }
1911
1912 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1913         char buf[FORMAT_TIMESTAMP_MAX];
1914
1915         assert(s);
1916         assert(f);
1917
1918         if (!prefix)
1919                 prefix = "";
1920
1921         if (s->pid <= 0)
1922                 return;
1923
1924         fprintf(f,
1925                 "%sPID: %lu\n",
1926                 prefix, (unsigned long) s->pid);
1927
1928         if (s->start_timestamp.realtime > 0)
1929                 fprintf(f,
1930                         "%sStart Timestamp: %s\n",
1931                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1932
1933         if (s->exit_timestamp.realtime > 0)
1934                 fprintf(f,
1935                         "%sExit Timestamp: %s\n"
1936                         "%sExit Code: %s\n"
1937                         "%sExit Status: %i\n",
1938                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1939                         prefix, sigchld_code_to_string(s->code),
1940                         prefix, s->status);
1941 }
1942
1943 char *exec_command_line(char **argv) {
1944         size_t k;
1945         char *n, *p, **a;
1946         bool first = true;
1947
1948         assert(argv);
1949
1950         k = 1;
1951         STRV_FOREACH(a, argv)
1952                 k += strlen(*a)+3;
1953
1954         if (!(n = new(char, k)))
1955                 return NULL;
1956
1957         p = n;
1958         STRV_FOREACH(a, argv) {
1959
1960                 if (!first)
1961                         *(p++) = ' ';
1962                 else
1963                         first = false;
1964
1965                 if (strpbrk(*a, WHITESPACE)) {
1966                         *(p++) = '\'';
1967                         p = stpcpy(p, *a);
1968                         *(p++) = '\'';
1969                 } else
1970                         p = stpcpy(p, *a);
1971
1972         }
1973
1974         *p = 0;
1975
1976         /* FIXME: this doesn't really handle arguments that have
1977          * spaces and ticks in them */
1978
1979         return n;
1980 }
1981
1982 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1983         char *p2;
1984         const char *prefix2;
1985
1986         char *cmd;
1987
1988         assert(c);
1989         assert(f);
1990
1991         if (!prefix)
1992                 prefix = "";
1993         p2 = strappend(prefix, "\t");
1994         prefix2 = p2 ? p2 : prefix;
1995
1996         cmd = exec_command_line(c->argv);
1997
1998         fprintf(f,
1999                 "%sCommand Line: %s\n",
2000                 prefix, cmd ? cmd : strerror(ENOMEM));
2001
2002         free(cmd);
2003
2004         exec_status_dump(&c->exec_status, f, prefix2);
2005
2006         free(p2);
2007 }
2008
2009 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2010         assert(f);
2011
2012         if (!prefix)
2013                 prefix = "";
2014
2015         LIST_FOREACH(command, c, c)
2016                 exec_command_dump(c, f, prefix);
2017 }
2018
2019 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2020         ExecCommand *end;
2021
2022         assert(l);
2023         assert(e);
2024
2025         if (*l) {
2026                 /* It's kind of important, that we keep the order here */
2027                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2028                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2029         } else
2030               *l = e;
2031 }
2032
2033 int exec_command_set(ExecCommand *c, const char *path, ...) {
2034         va_list ap;
2035         char **l, *p;
2036
2037         assert(c);
2038         assert(path);
2039
2040         va_start(ap, path);
2041         l = strv_new_ap(path, ap);
2042         va_end(ap);
2043
2044         if (!l)
2045                 return -ENOMEM;
2046
2047         if (!(p = strdup(path))) {
2048                 strv_free(l);
2049                 return -ENOMEM;
2050         }
2051
2052         free(c->path);
2053         c->path = p;
2054
2055         strv_free(c->argv);
2056         c->argv = l;
2057
2058         return 0;
2059 }
2060
2061 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2062         [EXEC_INPUT_NULL] = "null",
2063         [EXEC_INPUT_TTY] = "tty",
2064         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2065         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2066         [EXEC_INPUT_SOCKET] = "socket"
2067 };
2068
2069 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2070
2071 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2072         [EXEC_OUTPUT_INHERIT] = "inherit",
2073         [EXEC_OUTPUT_NULL] = "null",
2074         [EXEC_OUTPUT_TTY] = "tty",
2075         [EXEC_OUTPUT_SYSLOG] = "syslog",
2076         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2077         [EXEC_OUTPUT_KMSG] = "kmsg",
2078         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2079         [EXEC_OUTPUT_JOURNAL] = "journal",
2080         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2081         [EXEC_OUTPUT_SOCKET] = "socket"
2082 };
2083
2084 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);