chiark / gitweb /
execute: support syscall filtering using seccomp filters
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42
43 #ifdef HAVE_PAM
44 #include <security/pam_appl.h>
45 #endif
46
47 #include "execute.h"
48 #include "strv.h"
49 #include "macro.h"
50 #include "capability.h"
51 #include "util.h"
52 #include "log.h"
53 #include "ioprio.h"
54 #include "securebits.h"
55 #include "cgroup.h"
56 #include "namespace.h"
57 #include "tcpwrap.h"
58 #include "exit-status.h"
59 #include "missing.h"
60 #include "utmp-wtmp.h"
61 #include "def.h"
62 #include "loopback-setup.h"
63 #include "path-util.h"
64 #include "syscall-list.h"
65
66 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
67
68 /* This assumes there is a 'tty' group */
69 #define TTY_MODE 0620
70
71 static int shift_fds(int fds[], unsigned n_fds) {
72         int start, restart_from;
73
74         if (n_fds <= 0)
75                 return 0;
76
77         /* Modifies the fds array! (sorts it) */
78
79         assert(fds);
80
81         start = 0;
82         for (;;) {
83                 int i;
84
85                 restart_from = -1;
86
87                 for (i = start; i < (int) n_fds; i++) {
88                         int nfd;
89
90                         /* Already at right index? */
91                         if (fds[i] == i+3)
92                                 continue;
93
94                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
95                                 return -errno;
96
97                         close_nointr_nofail(fds[i]);
98                         fds[i] = nfd;
99
100                         /* Hmm, the fd we wanted isn't free? Then
101                          * let's remember that and try again from here*/
102                         if (nfd != i+3 && restart_from < 0)
103                                 restart_from = i;
104                 }
105
106                 if (restart_from < 0)
107                         break;
108
109                 start = restart_from;
110         }
111
112         return 0;
113 }
114
115 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
116         unsigned i;
117         int r;
118
119         if (n_fds <= 0)
120                 return 0;
121
122         assert(fds);
123
124         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
125
126         for (i = 0; i < n_fds; i++) {
127
128                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
129                         return r;
130
131                 /* We unconditionally drop FD_CLOEXEC from the fds,
132                  * since after all we want to pass these fds to our
133                  * children */
134
135                 if ((r = fd_cloexec(fds[i], false)) < 0)
136                         return r;
137         }
138
139         return 0;
140 }
141
142 static const char *tty_path(const ExecContext *context) {
143         assert(context);
144
145         if (context->tty_path)
146                 return context->tty_path;
147
148         return "/dev/console";
149 }
150
151 void exec_context_tty_reset(const ExecContext *context) {
152         assert(context);
153
154         if (context->tty_vhangup)
155                 terminal_vhangup(tty_path(context));
156
157         if (context->tty_reset)
158                 reset_terminal(tty_path(context));
159
160         if (context->tty_vt_disallocate && context->tty_path)
161                 vt_disallocate(context->tty_path);
162 }
163
164 static int open_null_as(int flags, int nfd) {
165         int fd, r;
166
167         assert(nfd >= 0);
168
169         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
170                 return -errno;
171
172         if (fd != nfd) {
173                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
174                 close_nointr_nofail(fd);
175         } else
176                 r = nfd;
177
178         return r;
179 }
180
181 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
182         int fd, r;
183         union sockaddr_union sa;
184
185         assert(context);
186         assert(output < _EXEC_OUTPUT_MAX);
187         assert(ident);
188         assert(nfd >= 0);
189
190         fd = socket(AF_UNIX, SOCK_STREAM, 0);
191         if (fd < 0)
192                 return -errno;
193
194         zero(sa);
195         sa.un.sun_family = AF_UNIX;
196         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
197
198         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
199         if (r < 0) {
200                 close_nointr_nofail(fd);
201                 return -errno;
202         }
203
204         if (shutdown(fd, SHUT_RD) < 0) {
205                 close_nointr_nofail(fd);
206                 return -errno;
207         }
208
209         dprintf(fd,
210                 "%s\n"
211                 "%s\n"
212                 "%i\n"
213                 "%i\n"
214                 "%i\n"
215                 "%i\n"
216                 "%i\n",
217                 context->syslog_identifier ? context->syslog_identifier : ident,
218                 unit_id,
219                 context->syslog_priority,
220                 !!context->syslog_level_prefix,
221                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
222                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
223                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
224
225         if (fd != nfd) {
226                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
227                 close_nointr_nofail(fd);
228         } else
229                 r = nfd;
230
231         return r;
232 }
233 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
234         int fd, r;
235
236         assert(path);
237         assert(nfd >= 0);
238
239         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
240                 return fd;
241
242         if (fd != nfd) {
243                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
244                 close_nointr_nofail(fd);
245         } else
246                 r = nfd;
247
248         return r;
249 }
250
251 static bool is_terminal_input(ExecInput i) {
252         return
253                 i == EXEC_INPUT_TTY ||
254                 i == EXEC_INPUT_TTY_FORCE ||
255                 i == EXEC_INPUT_TTY_FAIL;
256 }
257
258 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
259
260         if (is_terminal_input(std_input) && !apply_tty_stdin)
261                 return EXEC_INPUT_NULL;
262
263         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
264                 return EXEC_INPUT_NULL;
265
266         return std_input;
267 }
268
269 static int fixup_output(ExecOutput std_output, int socket_fd) {
270
271         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
272                 return EXEC_OUTPUT_INHERIT;
273
274         return std_output;
275 }
276
277 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
278         ExecInput i;
279
280         assert(context);
281
282         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
283
284         switch (i) {
285
286         case EXEC_INPUT_NULL:
287                 return open_null_as(O_RDONLY, STDIN_FILENO);
288
289         case EXEC_INPUT_TTY:
290         case EXEC_INPUT_TTY_FORCE:
291         case EXEC_INPUT_TTY_FAIL: {
292                 int fd, r;
293
294                 if ((fd = acquire_terminal(
295                                      tty_path(context),
296                                      i == EXEC_INPUT_TTY_FAIL,
297                                      i == EXEC_INPUT_TTY_FORCE,
298                                      false,
299                                      (usec_t) -1)) < 0)
300                         return fd;
301
302                 if (fd != STDIN_FILENO) {
303                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
304                         close_nointr_nofail(fd);
305                 } else
306                         r = STDIN_FILENO;
307
308                 return r;
309         }
310
311         case EXEC_INPUT_SOCKET:
312                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
313
314         default:
315                 assert_not_reached("Unknown input type");
316         }
317 }
318
319 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
320         ExecOutput o;
321         ExecInput i;
322
323         assert(context);
324         assert(ident);
325
326         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
327         o = fixup_output(context->std_output, socket_fd);
328
329         /* This expects the input is already set up */
330
331         switch (o) {
332
333         case EXEC_OUTPUT_INHERIT:
334
335                 /* If input got downgraded, inherit the original value */
336                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
337                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
338
339                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
340                 if (i != EXEC_INPUT_NULL)
341                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
342
343                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
344                 if (getppid() != 1)
345                         return STDOUT_FILENO;
346
347                 /* We need to open /dev/null here anew, to get the
348                  * right access mode. So we fall through */
349
350         case EXEC_OUTPUT_NULL:
351                 return open_null_as(O_WRONLY, STDOUT_FILENO);
352
353         case EXEC_OUTPUT_TTY:
354                 if (is_terminal_input(i))
355                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
356
357                 /* We don't reset the terminal if this is just about output */
358                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
359
360         case EXEC_OUTPUT_SYSLOG:
361         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
362         case EXEC_OUTPUT_KMSG:
363         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
364         case EXEC_OUTPUT_JOURNAL:
365         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
366                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
367
368         case EXEC_OUTPUT_SOCKET:
369                 assert(socket_fd >= 0);
370                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
371
372         default:
373                 assert_not_reached("Unknown output type");
374         }
375 }
376
377 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
378         ExecOutput o, e;
379         ExecInput i;
380
381         assert(context);
382         assert(ident);
383
384         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
385         o = fixup_output(context->std_output, socket_fd);
386         e = fixup_output(context->std_error, socket_fd);
387
388         /* This expects the input and output are already set up */
389
390         /* Don't change the stderr file descriptor if we inherit all
391          * the way and are not on a tty */
392         if (e == EXEC_OUTPUT_INHERIT &&
393             o == EXEC_OUTPUT_INHERIT &&
394             i == EXEC_INPUT_NULL &&
395             !is_terminal_input(context->std_input) &&
396             getppid () != 1)
397                 return STDERR_FILENO;
398
399         /* Duplicate from stdout if possible */
400         if (e == o || e == EXEC_OUTPUT_INHERIT)
401                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
402
403         switch (e) {
404
405         case EXEC_OUTPUT_NULL:
406                 return open_null_as(O_WRONLY, STDERR_FILENO);
407
408         case EXEC_OUTPUT_TTY:
409                 if (is_terminal_input(i))
410                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
411
412                 /* We don't reset the terminal if this is just about output */
413                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
414
415         case EXEC_OUTPUT_SYSLOG:
416         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
417         case EXEC_OUTPUT_KMSG:
418         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
419         case EXEC_OUTPUT_JOURNAL:
420         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
421                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
422
423         case EXEC_OUTPUT_SOCKET:
424                 assert(socket_fd >= 0);
425                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
426
427         default:
428                 assert_not_reached("Unknown error type");
429         }
430 }
431
432 static int chown_terminal(int fd, uid_t uid) {
433         struct stat st;
434
435         assert(fd >= 0);
436
437         /* This might fail. What matters are the results. */
438         (void) fchown(fd, uid, -1);
439         (void) fchmod(fd, TTY_MODE);
440
441         if (fstat(fd, &st) < 0)
442                 return -errno;
443
444         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
445                 return -EPERM;
446
447         return 0;
448 }
449
450 static int setup_confirm_stdio(int *_saved_stdin,
451                                int *_saved_stdout) {
452         int fd = -1, saved_stdin, saved_stdout = -1, r;
453
454         assert(_saved_stdin);
455         assert(_saved_stdout);
456
457         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
458         if (saved_stdin < 0)
459                 return -errno;
460
461         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
462         if (saved_stdout < 0) {
463                 r = errno;
464                 goto fail;
465         }
466
467         fd = acquire_terminal(
468                         "/dev/console",
469                         false,
470                         false,
471                         false,
472                         DEFAULT_CONFIRM_USEC);
473         if (fd < 0) {
474                 r = fd;
475                 goto fail;
476         }
477
478         r = chown_terminal(fd, getuid());
479         if (r < 0)
480                 goto fail;
481
482         if (dup2(fd, STDIN_FILENO) < 0) {
483                 r = -errno;
484                 goto fail;
485         }
486
487         if (dup2(fd, STDOUT_FILENO) < 0) {
488                 r = -errno;
489                 goto fail;
490         }
491
492         if (fd >= 2)
493                 close_nointr_nofail(fd);
494
495         *_saved_stdin = saved_stdin;
496         *_saved_stdout = saved_stdout;
497
498         return 0;
499
500 fail:
501         if (saved_stdout >= 0)
502                 close_nointr_nofail(saved_stdout);
503
504         if (saved_stdin >= 0)
505                 close_nointr_nofail(saved_stdin);
506
507         if (fd >= 0)
508                 close_nointr_nofail(fd);
509
510         return r;
511 }
512
513 static int write_confirm_message(const char *format, ...) {
514         int fd;
515         va_list ap;
516
517         assert(format);
518
519         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
520         if (fd < 0)
521                 return fd;
522
523         va_start(ap, format);
524         vdprintf(fd, format, ap);
525         va_end(ap);
526
527         close_nointr_nofail(fd);
528
529         return 0;
530 }
531
532 static int restore_confirm_stdio(int *saved_stdin,
533                                  int *saved_stdout) {
534
535         int r = 0;
536
537         assert(saved_stdin);
538         assert(saved_stdout);
539
540         release_terminal();
541
542         if (*saved_stdin >= 0)
543                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
544                         r = -errno;
545
546         if (*saved_stdout >= 0)
547                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
548                         r = -errno;
549
550         if (*saved_stdin >= 0)
551                 close_nointr_nofail(*saved_stdin);
552
553         if (*saved_stdout >= 0)
554                 close_nointr_nofail(*saved_stdout);
555
556         return r;
557 }
558
559 static int ask_for_confirmation(char *response, char **argv) {
560         int saved_stdout = -1, saved_stdin = -1, r;
561         char *line;
562
563         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
564         if (r < 0)
565                 return r;
566
567         line = exec_command_line(argv);
568         if (!line)
569                 return -ENOMEM;
570
571         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
572         free(line);
573
574         restore_confirm_stdio(&saved_stdin, &saved_stdout);
575
576         return r;
577 }
578
579 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
580         bool keep_groups = false;
581         int r;
582
583         assert(context);
584
585         /* Lookup and set GID and supplementary group list. Here too
586          * we avoid NSS lookups for gid=0. */
587
588         if (context->group || username) {
589
590                 if (context->group) {
591                         const char *g = context->group;
592
593                         if ((r = get_group_creds(&g, &gid)) < 0)
594                                 return r;
595                 }
596
597                 /* First step, initialize groups from /etc/groups */
598                 if (username && gid != 0) {
599                         if (initgroups(username, gid) < 0)
600                                 return -errno;
601
602                         keep_groups = true;
603                 }
604
605                 /* Second step, set our gids */
606                 if (setresgid(gid, gid, gid) < 0)
607                         return -errno;
608         }
609
610         if (context->supplementary_groups) {
611                 int ngroups_max, k;
612                 gid_t *gids;
613                 char **i;
614
615                 /* Final step, initialize any manually set supplementary groups */
616                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
617
618                 if (!(gids = new(gid_t, ngroups_max)))
619                         return -ENOMEM;
620
621                 if (keep_groups) {
622                         if ((k = getgroups(ngroups_max, gids)) < 0) {
623                                 free(gids);
624                                 return -errno;
625                         }
626                 } else
627                         k = 0;
628
629                 STRV_FOREACH(i, context->supplementary_groups) {
630                         const char *g;
631
632                         if (k >= ngroups_max) {
633                                 free(gids);
634                                 return -E2BIG;
635                         }
636
637                         g = *i;
638                         r = get_group_creds(&g, gids+k);
639                         if (r < 0) {
640                                 free(gids);
641                                 return r;
642                         }
643
644                         k++;
645                 }
646
647                 if (setgroups(k, gids) < 0) {
648                         free(gids);
649                         return -errno;
650                 }
651
652                 free(gids);
653         }
654
655         return 0;
656 }
657
658 static int enforce_user(const ExecContext *context, uid_t uid) {
659         int r;
660         assert(context);
661
662         /* Sets (but doesn't lookup) the uid and make sure we keep the
663          * capabilities while doing so. */
664
665         if (context->capabilities) {
666                 cap_t d;
667                 static const cap_value_t bits[] = {
668                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
669                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
670                 };
671
672                 /* First step: If we need to keep capabilities but
673                  * drop privileges we need to make sure we keep our
674                  * caps, whiel we drop privileges. */
675                 if (uid != 0) {
676                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
677
678                         if (prctl(PR_GET_SECUREBITS) != sb)
679                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
680                                         return -errno;
681                 }
682
683                 /* Second step: set the capabilities. This will reduce
684                  * the capabilities to the minimum we need. */
685
686                 if (!(d = cap_dup(context->capabilities)))
687                         return -errno;
688
689                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
690                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
691                         r = -errno;
692                         cap_free(d);
693                         return r;
694                 }
695
696                 if (cap_set_proc(d) < 0) {
697                         r = -errno;
698                         cap_free(d);
699                         return r;
700                 }
701
702                 cap_free(d);
703         }
704
705         /* Third step: actually set the uids */
706         if (setresuid(uid, uid, uid) < 0)
707                 return -errno;
708
709         /* At this point we should have all necessary capabilities but
710            are otherwise a normal user. However, the caps might got
711            corrupted due to the setresuid() so we need clean them up
712            later. This is done outside of this call. */
713
714         return 0;
715 }
716
717 #ifdef HAVE_PAM
718
719 static int null_conv(
720                 int num_msg,
721                 const struct pam_message **msg,
722                 struct pam_response **resp,
723                 void *appdata_ptr) {
724
725         /* We don't support conversations */
726
727         return PAM_CONV_ERR;
728 }
729
730 static int setup_pam(
731                 const char *name,
732                 const char *user,
733                 uid_t uid,
734                 const char *tty,
735                 char ***pam_env,
736                 int fds[], unsigned n_fds) {
737
738         static const struct pam_conv conv = {
739                 .conv = null_conv,
740                 .appdata_ptr = NULL
741         };
742
743         pam_handle_t *handle = NULL;
744         sigset_t ss, old_ss;
745         int pam_code = PAM_SUCCESS;
746         int err;
747         char **e = NULL;
748         bool close_session = false;
749         pid_t pam_pid = 0, parent_pid;
750
751         assert(name);
752         assert(user);
753         assert(pam_env);
754
755         /* We set up PAM in the parent process, then fork. The child
756          * will then stay around until killed via PR_GET_PDEATHSIG or
757          * systemd via the cgroup logic. It will then remove the PAM
758          * session again. The parent process will exec() the actual
759          * daemon. We do things this way to ensure that the main PID
760          * of the daemon is the one we initially fork()ed. */
761
762         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
763                 handle = NULL;
764                 goto fail;
765         }
766
767         if (tty)
768                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
769                         goto fail;
770
771         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
772                 goto fail;
773
774         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
775                 goto fail;
776
777         close_session = true;
778
779         if ((!(e = pam_getenvlist(handle)))) {
780                 pam_code = PAM_BUF_ERR;
781                 goto fail;
782         }
783
784         /* Block SIGTERM, so that we know that it won't get lost in
785          * the child */
786         if (sigemptyset(&ss) < 0 ||
787             sigaddset(&ss, SIGTERM) < 0 ||
788             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
789                 goto fail;
790
791         parent_pid = getpid();
792
793         if ((pam_pid = fork()) < 0)
794                 goto fail;
795
796         if (pam_pid == 0) {
797                 int sig;
798                 int r = EXIT_PAM;
799
800                 /* The child's job is to reset the PAM session on
801                  * termination */
802
803                 /* This string must fit in 10 chars (i.e. the length
804                  * of "/sbin/init"), to look pretty in /bin/ps */
805                 rename_process("(sd-pam)");
806
807                 /* Make sure we don't keep open the passed fds in this
808                 child. We assume that otherwise only those fds are
809                 open here that have been opened by PAM. */
810                 close_many(fds, n_fds);
811
812                 /* Drop privileges - we don't need any to pam_close_session
813                  * and this will make PR_SET_PDEATHSIG work in most cases.
814                  * If this fails, ignore the error - but expect sd-pam threads
815                  * to fail to exit normally */
816                 if (setresuid(uid, uid, uid) < 0)
817                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
818
819                 /* Wait until our parent died. This will only work if
820                  * the above setresuid() succeeds, otherwise the kernel
821                  * will not allow unprivileged parents kill their privileged
822                  * children this way. We rely on the control groups kill logic
823                  * to do the rest for us. */
824                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
825                         goto child_finish;
826
827                 /* Check if our parent process might already have
828                  * died? */
829                 if (getppid() == parent_pid) {
830                         for (;;) {
831                                 if (sigwait(&ss, &sig) < 0) {
832                                         if (errno == EINTR)
833                                                 continue;
834
835                                         goto child_finish;
836                                 }
837
838                                 assert(sig == SIGTERM);
839                                 break;
840                         }
841                 }
842
843                 /* If our parent died we'll end the session */
844                 if (getppid() != parent_pid)
845                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
846                                 goto child_finish;
847
848                 r = 0;
849
850         child_finish:
851                 pam_end(handle, pam_code | PAM_DATA_SILENT);
852                 _exit(r);
853         }
854
855         /* If the child was forked off successfully it will do all the
856          * cleanups, so forget about the handle here. */
857         handle = NULL;
858
859         /* Unblock SIGTERM again in the parent */
860         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
861                 goto fail;
862
863         /* We close the log explicitly here, since the PAM modules
864          * might have opened it, but we don't want this fd around. */
865         closelog();
866
867         *pam_env = e;
868         e = NULL;
869
870         return 0;
871
872 fail:
873         if (pam_code != PAM_SUCCESS)
874                 err = -EPERM;  /* PAM errors do not map to errno */
875         else
876                 err = -errno;
877
878         if (handle) {
879                 if (close_session)
880                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
881
882                 pam_end(handle, pam_code | PAM_DATA_SILENT);
883         }
884
885         strv_free(e);
886
887         closelog();
888
889         if (pam_pid > 1) {
890                 kill(pam_pid, SIGTERM);
891                 kill(pam_pid, SIGCONT);
892         }
893
894         return err;
895 }
896 #endif
897
898 static void rename_process_from_path(const char *path) {
899         char process_name[11];
900         const char *p;
901         size_t l;
902
903         /* This resulting string must fit in 10 chars (i.e. the length
904          * of "/sbin/init") to look pretty in /bin/ps */
905
906         p = path_get_file_name(path);
907         if (isempty(p)) {
908                 rename_process("(...)");
909                 return;
910         }
911
912         l = strlen(p);
913         if (l > 8) {
914                 /* The end of the process name is usually more
915                  * interesting, since the first bit might just be
916                  * "systemd-" */
917                 p = p + l - 8;
918                 l = 8;
919         }
920
921         process_name[0] = '(';
922         memcpy(process_name+1, p, l);
923         process_name[1+l] = ')';
924         process_name[1+l+1] = 0;
925
926         rename_process(process_name);
927 }
928
929 static int apply_seccomp(uint32_t *syscall_filter) {
930         static const struct sock_filter header[] = {
931                 VALIDATE_ARCHITECTURE,
932                 EXAMINE_SYSCALL
933         };
934         static const struct sock_filter footer[] = {
935                 _KILL_PROCESS
936         };
937
938         int i;
939         unsigned n;
940         struct sock_filter *f;
941         struct sock_fprog prog;
942
943         assert(syscall_filter);
944
945         /* First: count the syscalls to check for */
946         for (i = 0, n = 0; i < syscall_max(); i++)
947                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
948                         n++;
949
950         /* Second: build the filter program from a header the syscall
951          * matches and the footer */
952         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
953         memcpy(f, header, sizeof(header));
954
955         for (i = 0, n = 0; i < syscall_max(); i++)
956                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
957                         struct sock_filter item[] = {
958                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
959                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
960                         };
961
962                         assert_cc(ELEMENTSOF(item) == 2);
963
964                         f[ELEMENTSOF(header) + 2*n]  = item[0];
965                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
966
967                         n++;
968                 }
969
970         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
971
972         /* Third: install the filter */
973         zero(prog);
974         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
975         prog.filter = f;
976         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
977                 return -errno;
978
979         return 0;
980 }
981
982 int exec_spawn(ExecCommand *command,
983                char **argv,
984                const ExecContext *context,
985                int fds[], unsigned n_fds,
986                char **environment,
987                bool apply_permissions,
988                bool apply_chroot,
989                bool apply_tty_stdin,
990                bool confirm_spawn,
991                CGroupBonding *cgroup_bondings,
992                CGroupAttribute *cgroup_attributes,
993                const char *cgroup_suffix,
994                const char *unit_id,
995                int idle_pipe[2],
996                pid_t *ret) {
997
998         pid_t pid;
999         int r;
1000         char *line;
1001         int socket_fd;
1002         char **files_env = NULL;
1003
1004         assert(command);
1005         assert(context);
1006         assert(ret);
1007         assert(fds || n_fds <= 0);
1008
1009         if (context->std_input == EXEC_INPUT_SOCKET ||
1010             context->std_output == EXEC_OUTPUT_SOCKET ||
1011             context->std_error == EXEC_OUTPUT_SOCKET) {
1012
1013                 if (n_fds != 1)
1014                         return -EINVAL;
1015
1016                 socket_fd = fds[0];
1017
1018                 fds = NULL;
1019                 n_fds = 0;
1020         } else
1021                 socket_fd = -1;
1022
1023         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
1024                 log_error("Failed to load environment files: %s", strerror(-r));
1025                 return r;
1026         }
1027
1028         if (!argv)
1029                 argv = command->argv;
1030
1031         line = exec_command_line(argv);
1032         if (!line) {
1033                 r = -ENOMEM;
1034                 goto fail_parent;
1035         }
1036
1037         log_debug("About to execute: %s", line);
1038         free(line);
1039
1040         r = cgroup_bonding_realize_list(cgroup_bondings);
1041         if (r < 0)
1042                 goto fail_parent;
1043
1044         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1045
1046         if ((pid = fork()) < 0) {
1047                 r = -errno;
1048                 goto fail_parent;
1049         }
1050
1051         if (pid == 0) {
1052                 int i, err;
1053                 sigset_t ss;
1054                 const char *username = NULL, *home = NULL;
1055                 uid_t uid = (uid_t) -1;
1056                 gid_t gid = (gid_t) -1;
1057                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1058                 unsigned n_env = 0;
1059                 bool set_access = false;
1060
1061                 /* child */
1062
1063                 rename_process_from_path(command->path);
1064
1065                 /* We reset exactly these signals, since they are the
1066                  * only ones we set to SIG_IGN in the main daemon. All
1067                  * others we leave untouched because we set them to
1068                  * SIG_DFL or a valid handler initially, both of which
1069                  * will be demoted to SIG_DFL. */
1070                 default_signals(SIGNALS_CRASH_HANDLER,
1071                                 SIGNALS_IGNORE, -1);
1072
1073                 if (context->ignore_sigpipe)
1074                         ignore_signals(SIGPIPE, -1);
1075
1076                 assert_se(sigemptyset(&ss) == 0);
1077                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1078                         err = -errno;
1079                         r = EXIT_SIGNAL_MASK;
1080                         goto fail_child;
1081                 }
1082
1083                 if (idle_pipe) {
1084                         if (idle_pipe[1] >= 0)
1085                                 close_nointr_nofail(idle_pipe[1]);
1086                         if (idle_pipe[0] >= 0) {
1087                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1088                                 close_nointr_nofail(idle_pipe[0]);
1089                         }
1090                 }
1091
1092                 /* Close sockets very early to make sure we don't
1093                  * block init reexecution because it cannot bind its
1094                  * sockets */
1095                 log_forget_fds();
1096                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1097                                            socket_fd >= 0 ? 1 : n_fds);
1098                 if (err < 0) {
1099                         r = EXIT_FDS;
1100                         goto fail_child;
1101                 }
1102
1103                 if (!context->same_pgrp)
1104                         if (setsid() < 0) {
1105                                 err = -errno;
1106                                 r = EXIT_SETSID;
1107                                 goto fail_child;
1108                         }
1109
1110                 if (context->tcpwrap_name) {
1111                         if (socket_fd >= 0)
1112                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1113                                         err = -EACCES;
1114                                         r = EXIT_TCPWRAP;
1115                                         goto fail_child;
1116                                 }
1117
1118                         for (i = 0; i < (int) n_fds; i++) {
1119                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1120                                         err = -EACCES;
1121                                         r = EXIT_TCPWRAP;
1122                                         goto fail_child;
1123                                 }
1124                         }
1125                 }
1126
1127                 exec_context_tty_reset(context);
1128
1129                 if (confirm_spawn) {
1130                         char response;
1131
1132                         err = ask_for_confirmation(&response, argv);
1133                         if (err == -ETIMEDOUT)
1134                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1135                         else if (err < 0)
1136                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1137                         else if (response == 's') {
1138                                 write_confirm_message("Skipping execution.\n");
1139                                 err = -ECANCELED;
1140                                 r = EXIT_CONFIRM;
1141                                 goto fail_child;
1142                         } else if (response == 'n') {
1143                                 write_confirm_message("Failing execution.\n");
1144                                 err = r = 0;
1145                                 goto fail_child;
1146                         }
1147                 }
1148
1149                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1150                  * must sure to drop O_NONBLOCK */
1151                 if (socket_fd >= 0)
1152                         fd_nonblock(socket_fd, false);
1153
1154                 err = setup_input(context, socket_fd, apply_tty_stdin);
1155                 if (err < 0) {
1156                         r = EXIT_STDIN;
1157                         goto fail_child;
1158                 }
1159
1160                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1161                 if (err < 0) {
1162                         r = EXIT_STDOUT;
1163                         goto fail_child;
1164                 }
1165
1166                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1167                 if (err < 0) {
1168                         r = EXIT_STDERR;
1169                         goto fail_child;
1170                 }
1171
1172                 if (cgroup_bondings) {
1173                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1174                         if (err < 0) {
1175                                 r = EXIT_CGROUP;
1176                                 goto fail_child;
1177                         }
1178                 }
1179
1180                 if (context->oom_score_adjust_set) {
1181                         char t[16];
1182
1183                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1184                         char_array_0(t);
1185
1186                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1187                                 err = -errno;
1188                                 r = EXIT_OOM_ADJUST;
1189                                 goto fail_child;
1190                         }
1191                 }
1192
1193                 if (context->nice_set)
1194                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1195                                 err = -errno;
1196                                 r = EXIT_NICE;
1197                                 goto fail_child;
1198                         }
1199
1200                 if (context->cpu_sched_set) {
1201                         struct sched_param param;
1202
1203                         zero(param);
1204                         param.sched_priority = context->cpu_sched_priority;
1205
1206                         if (sched_setscheduler(0, context->cpu_sched_policy |
1207                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1208                                 err = -errno;
1209                                 r = EXIT_SETSCHEDULER;
1210                                 goto fail_child;
1211                         }
1212                 }
1213
1214                 if (context->cpuset)
1215                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1216                                 err = -errno;
1217                                 r = EXIT_CPUAFFINITY;
1218                                 goto fail_child;
1219                         }
1220
1221                 if (context->ioprio_set)
1222                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1223                                 err = -errno;
1224                                 r = EXIT_IOPRIO;
1225                                 goto fail_child;
1226                         }
1227
1228                 if (context->timer_slack_nsec != (nsec_t) -1)
1229                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1230                                 err = -errno;
1231                                 r = EXIT_TIMERSLACK;
1232                                 goto fail_child;
1233                         }
1234
1235                 if (context->utmp_id)
1236                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1237
1238                 if (context->user) {
1239                         username = context->user;
1240                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1241                         if (err < 0) {
1242                                 r = EXIT_USER;
1243                                 goto fail_child;
1244                         }
1245
1246                         if (is_terminal_input(context->std_input)) {
1247                                 err = chown_terminal(STDIN_FILENO, uid);
1248                                 if (err < 0) {
1249                                         r = EXIT_STDIN;
1250                                         goto fail_child;
1251                                 }
1252                         }
1253
1254                         if (cgroup_bondings && context->control_group_modify) {
1255                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1256                                 if (err >= 0)
1257                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1258                                 if (err < 0) {
1259                                         r = EXIT_CGROUP;
1260                                         goto fail_child;
1261                                 }
1262
1263                                 set_access = true;
1264                         }
1265                 }
1266
1267                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1268                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1269                         if (err < 0) {
1270                                 r = EXIT_CGROUP;
1271                                 goto fail_child;
1272                         }
1273                 }
1274
1275                 if (apply_permissions) {
1276                         err = enforce_groups(context, username, gid);
1277                         if (err < 0) {
1278                                 r = EXIT_GROUP;
1279                                 goto fail_child;
1280                         }
1281                 }
1282
1283                 umask(context->umask);
1284
1285 #ifdef HAVE_PAM
1286                 if (context->pam_name && username) {
1287                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1288                         if (err < 0) {
1289                                 r = EXIT_PAM;
1290                                 goto fail_child;
1291                         }
1292                 }
1293 #endif
1294                 if (context->private_network) {
1295                         if (unshare(CLONE_NEWNET) < 0) {
1296                                 err = -errno;
1297                                 r = EXIT_NETWORK;
1298                                 goto fail_child;
1299                         }
1300
1301                         loopback_setup();
1302                 }
1303
1304                 if (strv_length(context->read_write_dirs) > 0 ||
1305                     strv_length(context->read_only_dirs) > 0 ||
1306                     strv_length(context->inaccessible_dirs) > 0 ||
1307                     context->mount_flags != MS_SHARED ||
1308                     context->private_tmp) {
1309                         err = setup_namespace(context->read_write_dirs,
1310                                               context->read_only_dirs,
1311                                               context->inaccessible_dirs,
1312                                               context->private_tmp,
1313                                               context->mount_flags);
1314                         if (err < 0) {
1315                                 r = EXIT_NAMESPACE;
1316                                 goto fail_child;
1317                         }
1318                 }
1319
1320                 if (apply_chroot) {
1321                         if (context->root_directory)
1322                                 if (chroot(context->root_directory) < 0) {
1323                                         err = -errno;
1324                                         r = EXIT_CHROOT;
1325                                         goto fail_child;
1326                                 }
1327
1328                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1329                                 err = -errno;
1330                                 r = EXIT_CHDIR;
1331                                 goto fail_child;
1332                         }
1333                 } else {
1334
1335                         char *d;
1336
1337                         if (asprintf(&d, "%s/%s",
1338                                      context->root_directory ? context->root_directory : "",
1339                                      context->working_directory ? context->working_directory : "") < 0) {
1340                                 err = -ENOMEM;
1341                                 r = EXIT_MEMORY;
1342                                 goto fail_child;
1343                         }
1344
1345                         if (chdir(d) < 0) {
1346                                 err = -errno;
1347                                 free(d);
1348                                 r = EXIT_CHDIR;
1349                                 goto fail_child;
1350                         }
1351
1352                         free(d);
1353                 }
1354
1355                 /* We repeat the fd closing here, to make sure that
1356                  * nothing is leaked from the PAM modules */
1357                 err = close_all_fds(fds, n_fds);
1358                 if (err >= 0)
1359                         err = shift_fds(fds, n_fds);
1360                 if (err >= 0)
1361                         err = flags_fds(fds, n_fds, context->non_blocking);
1362                 if (err < 0) {
1363                         r = EXIT_FDS;
1364                         goto fail_child;
1365                 }
1366
1367                 if (apply_permissions) {
1368
1369                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1370                                 if (!context->rlimit[i])
1371                                         continue;
1372
1373                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1374                                         err = -errno;
1375                                         r = EXIT_LIMITS;
1376                                         goto fail_child;
1377                                 }
1378                         }
1379
1380                         if (context->capability_bounding_set_drop) {
1381                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1382                                 if (err < 0) {
1383                                         r = EXIT_CAPABILITIES;
1384                                         goto fail_child;
1385                                 }
1386                         }
1387
1388                         if (context->user) {
1389                                 err = enforce_user(context, uid);
1390                                 if (err < 0) {
1391                                         r = EXIT_USER;
1392                                         goto fail_child;
1393                                 }
1394                         }
1395
1396                         /* PR_GET_SECUREBITS is not privileged, while
1397                          * PR_SET_SECUREBITS is. So to suppress
1398                          * potential EPERMs we'll try not to call
1399                          * PR_SET_SECUREBITS unless necessary. */
1400                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1401                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1402                                         err = -errno;
1403                                         r = EXIT_SECUREBITS;
1404                                         goto fail_child;
1405                                 }
1406
1407                         if (context->capabilities)
1408                                 if (cap_set_proc(context->capabilities) < 0) {
1409                                         err = -errno;
1410                                         r = EXIT_CAPABILITIES;
1411                                         goto fail_child;
1412                                 }
1413
1414                         if (context->no_new_privileges)
1415                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1416                                         err = -errno;
1417                                         r = EXIT_NO_NEW_PRIVILEGES;
1418                                         goto fail_child;
1419                                 }
1420
1421                         if (context->syscall_filter) {
1422                                 err = apply_seccomp(context->syscall_filter);
1423                                 if (err < 0) {
1424                                         r = EXIT_SECCOMP;
1425                                         goto fail_child;
1426                                 }
1427                         }
1428                 }
1429
1430                 if (!(our_env = new0(char*, 7))) {
1431                         err = -ENOMEM;
1432                         r = EXIT_MEMORY;
1433                         goto fail_child;
1434                 }
1435
1436                 if (n_fds > 0)
1437                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1438                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1439                                 err = -ENOMEM;
1440                                 r = EXIT_MEMORY;
1441                                 goto fail_child;
1442                         }
1443
1444                 if (home)
1445                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1446                                 err = -ENOMEM;
1447                                 r = EXIT_MEMORY;
1448                                 goto fail_child;
1449                         }
1450
1451                 if (username)
1452                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1453                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1454                                 err = -ENOMEM;
1455                                 r = EXIT_MEMORY;
1456                                 goto fail_child;
1457                         }
1458
1459                 if (is_terminal_input(context->std_input) ||
1460                     context->std_output == EXEC_OUTPUT_TTY ||
1461                     context->std_error == EXEC_OUTPUT_TTY)
1462                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1463                                 err = -ENOMEM;
1464                                 r = EXIT_MEMORY;
1465                                 goto fail_child;
1466                         }
1467
1468                 assert(n_env <= 7);
1469
1470                 if (!(final_env = strv_env_merge(
1471                                       5,
1472                                       environment,
1473                                       our_env,
1474                                       context->environment,
1475                                       files_env,
1476                                       pam_env,
1477                                       NULL))) {
1478                         err = -ENOMEM;
1479                         r = EXIT_MEMORY;
1480                         goto fail_child;
1481                 }
1482
1483                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1484                         err = -ENOMEM;
1485                         r = EXIT_MEMORY;
1486                         goto fail_child;
1487                 }
1488
1489                 final_env = strv_env_clean(final_env);
1490
1491                 execve(command->path, final_argv, final_env);
1492                 err = -errno;
1493                 r = EXIT_EXEC;
1494
1495         fail_child:
1496                 if (r != 0) {
1497                         log_open();
1498                         log_warning("Failed at step %s spawning %s: %s",
1499                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1500                                     command->path, strerror(-err));
1501                 }
1502
1503                 strv_free(our_env);
1504                 strv_free(final_env);
1505                 strv_free(pam_env);
1506                 strv_free(files_env);
1507                 strv_free(final_argv);
1508
1509                 _exit(r);
1510         }
1511
1512         strv_free(files_env);
1513
1514         /* We add the new process to the cgroup both in the child (so
1515          * that we can be sure that no user code is ever executed
1516          * outside of the cgroup) and in the parent (so that we can be
1517          * sure that when we kill the cgroup the process will be
1518          * killed too). */
1519         if (cgroup_bondings)
1520                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1521
1522         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1523
1524         exec_status_start(&command->exec_status, pid);
1525
1526         *ret = pid;
1527         return 0;
1528
1529 fail_parent:
1530         strv_free(files_env);
1531
1532         return r;
1533 }
1534
1535 void exec_context_init(ExecContext *c) {
1536         assert(c);
1537
1538         c->umask = 0022;
1539         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540         c->cpu_sched_policy = SCHED_OTHER;
1541         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542         c->syslog_level_prefix = true;
1543         c->mount_flags = MS_SHARED;
1544         c->kill_signal = SIGTERM;
1545         c->send_sigkill = true;
1546         c->control_group_persistent = -1;
1547         c->ignore_sigpipe = true;
1548         c->timer_slack_nsec = (nsec_t) -1;
1549 }
1550
1551 void exec_context_done(ExecContext *c) {
1552         unsigned l;
1553
1554         assert(c);
1555
1556         strv_free(c->environment);
1557         c->environment = NULL;
1558
1559         strv_free(c->environment_files);
1560         c->environment_files = NULL;
1561
1562         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1563                 free(c->rlimit[l]);
1564                 c->rlimit[l] = NULL;
1565         }
1566
1567         free(c->working_directory);
1568         c->working_directory = NULL;
1569         free(c->root_directory);
1570         c->root_directory = NULL;
1571
1572         free(c->tty_path);
1573         c->tty_path = NULL;
1574
1575         free(c->tcpwrap_name);
1576         c->tcpwrap_name = NULL;
1577
1578         free(c->syslog_identifier);
1579         c->syslog_identifier = NULL;
1580
1581         free(c->user);
1582         c->user = NULL;
1583
1584         free(c->group);
1585         c->group = NULL;
1586
1587         strv_free(c->supplementary_groups);
1588         c->supplementary_groups = NULL;
1589
1590         free(c->pam_name);
1591         c->pam_name = NULL;
1592
1593         if (c->capabilities) {
1594                 cap_free(c->capabilities);
1595                 c->capabilities = NULL;
1596         }
1597
1598         strv_free(c->read_only_dirs);
1599         c->read_only_dirs = NULL;
1600
1601         strv_free(c->read_write_dirs);
1602         c->read_write_dirs = NULL;
1603
1604         strv_free(c->inaccessible_dirs);
1605         c->inaccessible_dirs = NULL;
1606
1607         if (c->cpuset)
1608                 CPU_FREE(c->cpuset);
1609
1610         free(c->utmp_id);
1611         c->utmp_id = NULL;
1612 }
1613
1614 void exec_command_done(ExecCommand *c) {
1615         assert(c);
1616
1617         free(c->path);
1618         c->path = NULL;
1619
1620         strv_free(c->argv);
1621         c->argv = NULL;
1622 }
1623
1624 void exec_command_done_array(ExecCommand *c, unsigned n) {
1625         unsigned i;
1626
1627         for (i = 0; i < n; i++)
1628                 exec_command_done(c+i);
1629 }
1630
1631 void exec_command_free_list(ExecCommand *c) {
1632         ExecCommand *i;
1633
1634         while ((i = c)) {
1635                 LIST_REMOVE(ExecCommand, command, c, i);
1636                 exec_command_done(i);
1637                 free(i);
1638         }
1639 }
1640
1641 void exec_command_free_array(ExecCommand **c, unsigned n) {
1642         unsigned i;
1643
1644         for (i = 0; i < n; i++) {
1645                 exec_command_free_list(c[i]);
1646                 c[i] = NULL;
1647         }
1648 }
1649
1650 int exec_context_load_environment(const ExecContext *c, char ***l) {
1651         char **i, **r = NULL;
1652
1653         assert(c);
1654         assert(l);
1655
1656         STRV_FOREACH(i, c->environment_files) {
1657                 char *fn;
1658                 int k;
1659                 bool ignore = false;
1660                 char **p;
1661
1662                 fn = *i;
1663
1664                 if (fn[0] == '-') {
1665                         ignore = true;
1666                         fn ++;
1667                 }
1668
1669                 if (!path_is_absolute(fn)) {
1670
1671                         if (ignore)
1672                                 continue;
1673
1674                         strv_free(r);
1675                         return -EINVAL;
1676                 }
1677
1678                 if ((k = load_env_file(fn, &p)) < 0) {
1679
1680                         if (ignore)
1681                                 continue;
1682
1683                         strv_free(r);
1684                         return k;
1685                 }
1686
1687                 if (r == NULL)
1688                         r = p;
1689                 else {
1690                         char **m;
1691
1692                         m = strv_env_merge(2, r, p);
1693                         strv_free(r);
1694                         strv_free(p);
1695
1696                         if (!m)
1697                                 return -ENOMEM;
1698
1699                         r = m;
1700                 }
1701         }
1702
1703         *l = r;
1704
1705         return 0;
1706 }
1707
1708 static void strv_fprintf(FILE *f, char **l) {
1709         char **g;
1710
1711         assert(f);
1712
1713         STRV_FOREACH(g, l)
1714                 fprintf(f, " %s", *g);
1715 }
1716
1717 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1718         char ** e;
1719         unsigned i;
1720
1721         assert(c);
1722         assert(f);
1723
1724         if (!prefix)
1725                 prefix = "";
1726
1727         fprintf(f,
1728                 "%sUMask: %04o\n"
1729                 "%sWorkingDirectory: %s\n"
1730                 "%sRootDirectory: %s\n"
1731                 "%sNonBlocking: %s\n"
1732                 "%sPrivateTmp: %s\n"
1733                 "%sControlGroupModify: %s\n"
1734                 "%sControlGroupPersistent: %s\n"
1735                 "%sPrivateNetwork: %s\n",
1736                 prefix, c->umask,
1737                 prefix, c->working_directory ? c->working_directory : "/",
1738                 prefix, c->root_directory ? c->root_directory : "/",
1739                 prefix, yes_no(c->non_blocking),
1740                 prefix, yes_no(c->private_tmp),
1741                 prefix, yes_no(c->control_group_modify),
1742                 prefix, yes_no(c->control_group_persistent),
1743                 prefix, yes_no(c->private_network));
1744
1745         STRV_FOREACH(e, c->environment)
1746                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1747
1748         STRV_FOREACH(e, c->environment_files)
1749                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1750
1751         if (c->tcpwrap_name)
1752                 fprintf(f,
1753                         "%sTCPWrapName: %s\n",
1754                         prefix, c->tcpwrap_name);
1755
1756         if (c->nice_set)
1757                 fprintf(f,
1758                         "%sNice: %i\n",
1759                         prefix, c->nice);
1760
1761         if (c->oom_score_adjust_set)
1762                 fprintf(f,
1763                         "%sOOMScoreAdjust: %i\n",
1764                         prefix, c->oom_score_adjust);
1765
1766         for (i = 0; i < RLIM_NLIMITS; i++)
1767                 if (c->rlimit[i])
1768                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1769
1770         if (c->ioprio_set)
1771                 fprintf(f,
1772                         "%sIOSchedulingClass: %s\n"
1773                         "%sIOPriority: %i\n",
1774                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1775                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1776
1777         if (c->cpu_sched_set)
1778                 fprintf(f,
1779                         "%sCPUSchedulingPolicy: %s\n"
1780                         "%sCPUSchedulingPriority: %i\n"
1781                         "%sCPUSchedulingResetOnFork: %s\n",
1782                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1783                         prefix, c->cpu_sched_priority,
1784                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1785
1786         if (c->cpuset) {
1787                 fprintf(f, "%sCPUAffinity:", prefix);
1788                 for (i = 0; i < c->cpuset_ncpus; i++)
1789                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1790                                 fprintf(f, " %i", i);
1791                 fputs("\n", f);
1792         }
1793
1794         if (c->timer_slack_nsec != (nsec_t) -1)
1795                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1796
1797         fprintf(f,
1798                 "%sStandardInput: %s\n"
1799                 "%sStandardOutput: %s\n"
1800                 "%sStandardError: %s\n",
1801                 prefix, exec_input_to_string(c->std_input),
1802                 prefix, exec_output_to_string(c->std_output),
1803                 prefix, exec_output_to_string(c->std_error));
1804
1805         if (c->tty_path)
1806                 fprintf(f,
1807                         "%sTTYPath: %s\n"
1808                         "%sTTYReset: %s\n"
1809                         "%sTTYVHangup: %s\n"
1810                         "%sTTYVTDisallocate: %s\n",
1811                         prefix, c->tty_path,
1812                         prefix, yes_no(c->tty_reset),
1813                         prefix, yes_no(c->tty_vhangup),
1814                         prefix, yes_no(c->tty_vt_disallocate));
1815
1816         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1817             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1818             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1819             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1820                 fprintf(f,
1821                         "%sSyslogFacility: %s\n"
1822                         "%sSyslogLevel: %s\n",
1823                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1824                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1825
1826         if (c->capabilities) {
1827                 char *t;
1828                 if ((t = cap_to_text(c->capabilities, NULL))) {
1829                         fprintf(f, "%sCapabilities: %s\n",
1830                                 prefix, t);
1831                         cap_free(t);
1832                 }
1833         }
1834
1835         if (c->secure_bits)
1836                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1837                         prefix,
1838                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1839                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1840                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1841                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1842                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1843                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1844
1845         if (c->capability_bounding_set_drop) {
1846                 unsigned long l;
1847                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1848
1849                 for (l = 0; l <= cap_last_cap(); l++)
1850                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1851                                 char *t;
1852
1853                                 if ((t = cap_to_name(l))) {
1854                                         fprintf(f, " %s", t);
1855                                         cap_free(t);
1856                                 }
1857                         }
1858
1859                 fputs("\n", f);
1860         }
1861
1862         if (c->user)
1863                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1864         if (c->group)
1865                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1866
1867         if (strv_length(c->supplementary_groups) > 0) {
1868                 fprintf(f, "%sSupplementaryGroups:", prefix);
1869                 strv_fprintf(f, c->supplementary_groups);
1870                 fputs("\n", f);
1871         }
1872
1873         if (c->pam_name)
1874                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1875
1876         if (strv_length(c->read_write_dirs) > 0) {
1877                 fprintf(f, "%sReadWriteDirs:", prefix);
1878                 strv_fprintf(f, c->read_write_dirs);
1879                 fputs("\n", f);
1880         }
1881
1882         if (strv_length(c->read_only_dirs) > 0) {
1883                 fprintf(f, "%sReadOnlyDirs:", prefix);
1884                 strv_fprintf(f, c->read_only_dirs);
1885                 fputs("\n", f);
1886         }
1887
1888         if (strv_length(c->inaccessible_dirs) > 0) {
1889                 fprintf(f, "%sInaccessibleDirs:", prefix);
1890                 strv_fprintf(f, c->inaccessible_dirs);
1891                 fputs("\n", f);
1892         }
1893
1894         fprintf(f,
1895                 "%sKillMode: %s\n"
1896                 "%sKillSignal: SIG%s\n"
1897                 "%sSendSIGKILL: %s\n"
1898                 "%sIgnoreSIGPIPE: %s\n",
1899                 prefix, kill_mode_to_string(c->kill_mode),
1900                 prefix, signal_to_string(c->kill_signal),
1901                 prefix, yes_no(c->send_sigkill),
1902                 prefix, yes_no(c->ignore_sigpipe));
1903
1904         if (c->utmp_id)
1905                 fprintf(f,
1906                         "%sUtmpIdentifier: %s\n",
1907                         prefix, c->utmp_id);
1908 }
1909
1910 void exec_status_start(ExecStatus *s, pid_t pid) {
1911         assert(s);
1912
1913         zero(*s);
1914         s->pid = pid;
1915         dual_timestamp_get(&s->start_timestamp);
1916 }
1917
1918 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1919         assert(s);
1920
1921         if (s->pid && s->pid != pid)
1922                 zero(*s);
1923
1924         s->pid = pid;
1925         dual_timestamp_get(&s->exit_timestamp);
1926
1927         s->code = code;
1928         s->status = status;
1929
1930         if (context) {
1931                 if (context->utmp_id)
1932                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1933
1934                 exec_context_tty_reset(context);
1935         }
1936 }
1937
1938 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1939         char buf[FORMAT_TIMESTAMP_MAX];
1940
1941         assert(s);
1942         assert(f);
1943
1944         if (!prefix)
1945                 prefix = "";
1946
1947         if (s->pid <= 0)
1948                 return;
1949
1950         fprintf(f,
1951                 "%sPID: %lu\n",
1952                 prefix, (unsigned long) s->pid);
1953
1954         if (s->start_timestamp.realtime > 0)
1955                 fprintf(f,
1956                         "%sStart Timestamp: %s\n",
1957                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1958
1959         if (s->exit_timestamp.realtime > 0)
1960                 fprintf(f,
1961                         "%sExit Timestamp: %s\n"
1962                         "%sExit Code: %s\n"
1963                         "%sExit Status: %i\n",
1964                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1965                         prefix, sigchld_code_to_string(s->code),
1966                         prefix, s->status);
1967 }
1968
1969 char *exec_command_line(char **argv) {
1970         size_t k;
1971         char *n, *p, **a;
1972         bool first = true;
1973
1974         assert(argv);
1975
1976         k = 1;
1977         STRV_FOREACH(a, argv)
1978                 k += strlen(*a)+3;
1979
1980         if (!(n = new(char, k)))
1981                 return NULL;
1982
1983         p = n;
1984         STRV_FOREACH(a, argv) {
1985
1986                 if (!first)
1987                         *(p++) = ' ';
1988                 else
1989                         first = false;
1990
1991                 if (strpbrk(*a, WHITESPACE)) {
1992                         *(p++) = '\'';
1993                         p = stpcpy(p, *a);
1994                         *(p++) = '\'';
1995                 } else
1996                         p = stpcpy(p, *a);
1997
1998         }
1999
2000         *p = 0;
2001
2002         /* FIXME: this doesn't really handle arguments that have
2003          * spaces and ticks in them */
2004
2005         return n;
2006 }
2007
2008 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2009         char *p2;
2010         const char *prefix2;
2011
2012         char *cmd;
2013
2014         assert(c);
2015         assert(f);
2016
2017         if (!prefix)
2018                 prefix = "";
2019         p2 = strappend(prefix, "\t");
2020         prefix2 = p2 ? p2 : prefix;
2021
2022         cmd = exec_command_line(c->argv);
2023
2024         fprintf(f,
2025                 "%sCommand Line: %s\n",
2026                 prefix, cmd ? cmd : strerror(ENOMEM));
2027
2028         free(cmd);
2029
2030         exec_status_dump(&c->exec_status, f, prefix2);
2031
2032         free(p2);
2033 }
2034
2035 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2036         assert(f);
2037
2038         if (!prefix)
2039                 prefix = "";
2040
2041         LIST_FOREACH(command, c, c)
2042                 exec_command_dump(c, f, prefix);
2043 }
2044
2045 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2046         ExecCommand *end;
2047
2048         assert(l);
2049         assert(e);
2050
2051         if (*l) {
2052                 /* It's kind of important, that we keep the order here */
2053                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2054                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2055         } else
2056               *l = e;
2057 }
2058
2059 int exec_command_set(ExecCommand *c, const char *path, ...) {
2060         va_list ap;
2061         char **l, *p;
2062
2063         assert(c);
2064         assert(path);
2065
2066         va_start(ap, path);
2067         l = strv_new_ap(path, ap);
2068         va_end(ap);
2069
2070         if (!l)
2071                 return -ENOMEM;
2072
2073         if (!(p = strdup(path))) {
2074                 strv_free(l);
2075                 return -ENOMEM;
2076         }
2077
2078         free(c->path);
2079         c->path = p;
2080
2081         strv_free(c->argv);
2082         c->argv = l;
2083
2084         return 0;
2085 }
2086
2087 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2088         [EXEC_INPUT_NULL] = "null",
2089         [EXEC_INPUT_TTY] = "tty",
2090         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2091         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2092         [EXEC_INPUT_SOCKET] = "socket"
2093 };
2094
2095 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2096
2097 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2098         [EXEC_OUTPUT_INHERIT] = "inherit",
2099         [EXEC_OUTPUT_NULL] = "null",
2100         [EXEC_OUTPUT_TTY] = "tty",
2101         [EXEC_OUTPUT_SYSLOG] = "syslog",
2102         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2103         [EXEC_OUTPUT_KMSG] = "kmsg",
2104         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2105         [EXEC_OUTPUT_JOURNAL] = "journal",
2106         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2107         [EXEC_OUTPUT_SOCKET] = "socket"
2108 };
2109
2110 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2111
2112 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2113         [KILL_CONTROL_GROUP] = "control-group",
2114         [KILL_PROCESS] = "process",
2115         [KILL_NONE] = "none"
2116 };
2117
2118 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2119
2120 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2121         [KILL_MAIN] = "main",
2122         [KILL_CONTROL] = "control",
2123         [KILL_ALL] = "all"
2124 };
2125
2126 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);