chiark / gitweb /
Remove unnecessary casts in printfs
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100         int start, restart_from;
101
102         if (n_fds <= 0)
103                 return 0;
104
105         /* Modifies the fds array! (sorts it) */
106
107         assert(fds);
108
109         start = 0;
110         for (;;) {
111                 int i;
112
113                 restart_from = -1;
114
115                 for (i = start; i < (int) n_fds; i++) {
116                         int nfd;
117
118                         /* Already at right index? */
119                         if (fds[i] == i+3)
120                                 continue;
121
122                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123                                 return -errno;
124
125                         safe_close(fds[i]);
126                         fds[i] = nfd;
127
128                         /* Hmm, the fd we wanted isn't free? Then
129                          * let's remember that and try again from here*/
130                         if (nfd != i+3 && restart_from < 0)
131                                 restart_from = i;
132                 }
133
134                 if (restart_from < 0)
135                         break;
136
137                 start = restart_from;
138         }
139
140         return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144         unsigned i;
145         int r;
146
147         if (n_fds <= 0)
148                 return 0;
149
150         assert(fds);
151
152         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154         for (i = 0; i < n_fds; i++) {
155
156                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157                         return r;
158
159                 /* We unconditionally drop FD_CLOEXEC from the fds,
160                  * since after all we want to pass these fds to our
161                  * children */
162
163                 if ((r = fd_cloexec(fds[i], false)) < 0)
164                         return r;
165         }
166
167         return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171         assert(context);
172
173         if (context->tty_path)
174                 return context->tty_path;
175
176         return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180         assert(context);
181
182         if (context->tty_vhangup)
183                 terminal_vhangup(tty_path(context));
184
185         if (context->tty_reset)
186                 reset_terminal(tty_path(context));
187
188         if (context->tty_vt_disallocate && context->tty_path)
189                 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193         return
194                 o == EXEC_OUTPUT_TTY ||
195                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201         int fd, r;
202
203         assert(nfd >= 0);
204
205         fd = open("/dev/null", flags|O_NOCTTY);
206         if (fd < 0)
207                 return -errno;
208
209         if (fd != nfd) {
210                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211                 safe_close(fd);
212         } else
213                 r = nfd;
214
215         return r;
216 }
217
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
219         int fd, r;
220         union sockaddr_union sa = {
221                 .un.sun_family = AF_UNIX,
222                 .un.sun_path = "/run/systemd/journal/stdout",
223         };
224
225         assert(context);
226         assert(output < _EXEC_OUTPUT_MAX);
227         assert(ident);
228         assert(nfd >= 0);
229
230         fd = socket(AF_UNIX, SOCK_STREAM, 0);
231         if (fd < 0)
232                 return -errno;
233
234         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
235         if (r < 0) {
236                 safe_close(fd);
237                 return -errno;
238         }
239
240         if (shutdown(fd, SHUT_RD) < 0) {
241                 safe_close(fd);
242                 return -errno;
243         }
244
245         fd_inc_sndbuf(fd, SNDBUF_SIZE);
246
247         dprintf(fd,
248                 "%s\n"
249                 "%s\n"
250                 "%i\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n",
255                 context->syslog_identifier ? context->syslog_identifier : ident,
256                 unit_id,
257                 context->syslog_priority,
258                 !!context->syslog_level_prefix,
259                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261                 is_terminal_output(output));
262
263         if (fd != nfd) {
264                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
265                 safe_close(fd);
266         } else
267                 r = nfd;
268
269         return r;
270 }
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
272         int fd, r;
273
274         assert(path);
275         assert(nfd >= 0);
276
277         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
278                 return fd;
279
280         if (fd != nfd) {
281                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
282                 safe_close(fd);
283         } else
284                 r = nfd;
285
286         return r;
287 }
288
289 static bool is_terminal_input(ExecInput i) {
290         return
291                 i == EXEC_INPUT_TTY ||
292                 i == EXEC_INPUT_TTY_FORCE ||
293                 i == EXEC_INPUT_TTY_FAIL;
294 }
295
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
297
298         if (is_terminal_input(std_input) && !apply_tty_stdin)
299                 return EXEC_INPUT_NULL;
300
301         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302                 return EXEC_INPUT_NULL;
303
304         return std_input;
305 }
306
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
308
309         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310                 return EXEC_OUTPUT_INHERIT;
311
312         return std_output;
313 }
314
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
316         ExecInput i;
317
318         assert(context);
319
320         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
321
322         switch (i) {
323
324         case EXEC_INPUT_NULL:
325                 return open_null_as(O_RDONLY, STDIN_FILENO);
326
327         case EXEC_INPUT_TTY:
328         case EXEC_INPUT_TTY_FORCE:
329         case EXEC_INPUT_TTY_FAIL: {
330                 int fd, r;
331
332                 fd = acquire_terminal(tty_path(context),
333                                       i == EXEC_INPUT_TTY_FAIL,
334                                       i == EXEC_INPUT_TTY_FORCE,
335                                       false,
336                                       (usec_t) -1);
337                 if (fd < 0)
338                         return fd;
339
340                 if (fd != STDIN_FILENO) {
341                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
342                         safe_close(fd);
343                 } else
344                         r = STDIN_FILENO;
345
346                 return r;
347         }
348
349         case EXEC_INPUT_SOCKET:
350                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
351
352         default:
353                 assert_not_reached("Unknown input type");
354         }
355 }
356
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
358         ExecOutput o;
359         ExecInput i;
360         int r;
361
362         assert(context);
363         assert(ident);
364
365         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366         o = fixup_output(context->std_output, socket_fd);
367
368         if (fileno == STDERR_FILENO) {
369                 ExecOutput e;
370                 e = fixup_output(context->std_error, socket_fd);
371
372                 /* This expects the input and output are already set up */
373
374                 /* Don't change the stderr file descriptor if we inherit all
375                  * the way and are not on a tty */
376                 if (e == EXEC_OUTPUT_INHERIT &&
377                     o == EXEC_OUTPUT_INHERIT &&
378                     i == EXEC_INPUT_NULL &&
379                     !is_terminal_input(context->std_input) &&
380                     getppid () != 1)
381                         return fileno;
382
383                 /* Duplicate from stdout if possible */
384                 if (e == o || e == EXEC_OUTPUT_INHERIT)
385                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 o = e;
388
389         } else if (o == EXEC_OUTPUT_INHERIT) {
390                 /* If input got downgraded, inherit the original value */
391                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
393
394                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395                 if (i != EXEC_INPUT_NULL)
396                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
397
398                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
399                 if (getppid() != 1)
400                         return fileno;
401
402                 /* We need to open /dev/null here anew, to get the right access mode. */
403                 return open_null_as(O_WRONLY, fileno);
404         }
405
406         switch (o) {
407
408         case EXEC_OUTPUT_NULL:
409                 return open_null_as(O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_TTY:
412                 if (is_terminal_input(i))
413                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
414
415                 /* We don't reset the terminal if this is just about output */
416                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
417
418         case EXEC_OUTPUT_SYSLOG:
419         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420         case EXEC_OUTPUT_KMSG:
421         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422         case EXEC_OUTPUT_JOURNAL:
423         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424                 r = connect_logger_as(context, o, ident, unit_id, fileno);
425                 if (r < 0) {
426                         log_struct_unit(LOG_CRIT, unit_id,
427                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428                                 fileno == STDOUT_FILENO ? "out" : "err",
429                                 unit_id, strerror(-r),
430                                 "ERRNO=%d", -r,
431                                 NULL);
432                         r = open_null_as(O_WRONLY, fileno);
433                 }
434                 return r;
435
436         case EXEC_OUTPUT_SOCKET:
437                 assert(socket_fd >= 0);
438                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
439
440         default:
441                 assert_not_reached("Unknown error type");
442         }
443 }
444
445 static int chown_terminal(int fd, uid_t uid) {
446         struct stat st;
447
448         assert(fd >= 0);
449
450         /* This might fail. What matters are the results. */
451         (void) fchown(fd, uid, -1);
452         (void) fchmod(fd, TTY_MODE);
453
454         if (fstat(fd, &st) < 0)
455                 return -errno;
456
457         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
458                 return -EPERM;
459
460         return 0;
461 }
462
463 static int setup_confirm_stdio(int *_saved_stdin,
464                                int *_saved_stdout) {
465         int fd = -1, saved_stdin, saved_stdout = -1, r;
466
467         assert(_saved_stdin);
468         assert(_saved_stdout);
469
470         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
471         if (saved_stdin < 0)
472                 return -errno;
473
474         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475         if (saved_stdout < 0) {
476                 r = errno;
477                 goto fail;
478         }
479
480         fd = acquire_terminal(
481                         "/dev/console",
482                         false,
483                         false,
484                         false,
485                         DEFAULT_CONFIRM_USEC);
486         if (fd < 0) {
487                 r = fd;
488                 goto fail;
489         }
490
491         r = chown_terminal(fd, getuid());
492         if (r < 0)
493                 goto fail;
494
495         if (dup2(fd, STDIN_FILENO) < 0) {
496                 r = -errno;
497                 goto fail;
498         }
499
500         if (dup2(fd, STDOUT_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (fd >= 2)
506                 safe_close(fd);
507
508         *_saved_stdin = saved_stdin;
509         *_saved_stdout = saved_stdout;
510
511         return 0;
512
513 fail:
514         safe_close(saved_stdout);
515         safe_close(saved_stdin);
516         safe_close(fd);
517
518         return r;
519 }
520
521 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
522         _cleanup_close_ int fd = -1;
523         va_list ap;
524
525         assert(format);
526
527         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
528         if (fd < 0)
529                 return fd;
530
531         va_start(ap, format);
532         vdprintf(fd, format, ap);
533         va_end(ap);
534
535         return 0;
536 }
537
538 static int restore_confirm_stdio(int *saved_stdin,
539                                  int *saved_stdout) {
540
541         int r = 0;
542
543         assert(saved_stdin);
544         assert(saved_stdout);
545
546         release_terminal();
547
548         if (*saved_stdin >= 0)
549                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdout >= 0)
553                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554                         r = -errno;
555
556         safe_close(*saved_stdin);
557         safe_close(*saved_stdout);
558
559         return r;
560 }
561
562 static int ask_for_confirmation(char *response, char **argv) {
563         int saved_stdout = -1, saved_stdin = -1, r;
564         char *line;
565
566         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567         if (r < 0)
568                 return r;
569
570         line = exec_command_line(argv);
571         if (!line)
572                 return -ENOMEM;
573
574         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
575         free(line);
576
577         restore_confirm_stdio(&saved_stdin, &saved_stdout);
578
579         return r;
580 }
581
582 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
583         bool keep_groups = false;
584         int r;
585
586         assert(context);
587
588         /* Lookup and set GID and supplementary group list. Here too
589          * we avoid NSS lookups for gid=0. */
590
591         if (context->group || username) {
592
593                 if (context->group) {
594                         const char *g = context->group;
595
596                         if ((r = get_group_creds(&g, &gid)) < 0)
597                                 return r;
598                 }
599
600                 /* First step, initialize groups from /etc/groups */
601                 if (username && gid != 0) {
602                         if (initgroups(username, gid) < 0)
603                                 return -errno;
604
605                         keep_groups = true;
606                 }
607
608                 /* Second step, set our gids */
609                 if (setresgid(gid, gid, gid) < 0)
610                         return -errno;
611         }
612
613         if (context->supplementary_groups) {
614                 int ngroups_max, k;
615                 gid_t *gids;
616                 char **i;
617
618                 /* Final step, initialize any manually set supplementary groups */
619                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
620
621                 if (!(gids = new(gid_t, ngroups_max)))
622                         return -ENOMEM;
623
624                 if (keep_groups) {
625                         if ((k = getgroups(ngroups_max, gids)) < 0) {
626                                 free(gids);
627                                 return -errno;
628                         }
629                 } else
630                         k = 0;
631
632                 STRV_FOREACH(i, context->supplementary_groups) {
633                         const char *g;
634
635                         if (k >= ngroups_max) {
636                                 free(gids);
637                                 return -E2BIG;
638                         }
639
640                         g = *i;
641                         r = get_group_creds(&g, gids+k);
642                         if (r < 0) {
643                                 free(gids);
644                                 return r;
645                         }
646
647                         k++;
648                 }
649
650                 if (setgroups(k, gids) < 0) {
651                         free(gids);
652                         return -errno;
653                 }
654
655                 free(gids);
656         }
657
658         return 0;
659 }
660
661 static int enforce_user(const ExecContext *context, uid_t uid) {
662         assert(context);
663
664         /* Sets (but doesn't lookup) the uid and make sure we keep the
665          * capabilities while doing so. */
666
667         if (context->capabilities) {
668                 _cleanup_cap_free_ cap_t d = NULL;
669                 static const cap_value_t bits[] = {
670                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
671                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
672                 };
673
674                 /* First step: If we need to keep capabilities but
675                  * drop privileges we need to make sure we keep our
676                  * caps, while we drop privileges. */
677                 if (uid != 0) {
678                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
679
680                         if (prctl(PR_GET_SECUREBITS) != sb)
681                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682                                         return -errno;
683                 }
684
685                 /* Second step: set the capabilities. This will reduce
686                  * the capabilities to the minimum we need. */
687
688                 d = cap_dup(context->capabilities);
689                 if (!d)
690                         return -errno;
691
692                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
693                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
694                         return -errno;
695
696                 if (cap_set_proc(d) < 0)
697                         return -errno;
698         }
699
700         /* Third step: actually set the uids */
701         if (setresuid(uid, uid, uid) < 0)
702                 return -errno;
703
704         /* At this point we should have all necessary capabilities but
705            are otherwise a normal user. However, the caps might got
706            corrupted due to the setresuid() so we need clean them up
707            later. This is done outside of this call. */
708
709         return 0;
710 }
711
712 #ifdef HAVE_PAM
713
714 static int null_conv(
715                 int num_msg,
716                 const struct pam_message **msg,
717                 struct pam_response **resp,
718                 void *appdata_ptr) {
719
720         /* We don't support conversations */
721
722         return PAM_CONV_ERR;
723 }
724
725 static int setup_pam(
726                 const char *name,
727                 const char *user,
728                 uid_t uid,
729                 const char *tty,
730                 char ***pam_env,
731                 int fds[], unsigned n_fds) {
732
733         static const struct pam_conv conv = {
734                 .conv = null_conv,
735                 .appdata_ptr = NULL
736         };
737
738         pam_handle_t *handle = NULL;
739         sigset_t ss, old_ss;
740         int pam_code = PAM_SUCCESS;
741         int err;
742         char **e = NULL;
743         bool close_session = false;
744         pid_t pam_pid = 0, parent_pid;
745         int flags = 0;
746
747         assert(name);
748         assert(user);
749         assert(pam_env);
750
751         /* We set up PAM in the parent process, then fork. The child
752          * will then stay around until killed via PR_GET_PDEATHSIG or
753          * systemd via the cgroup logic. It will then remove the PAM
754          * session again. The parent process will exec() the actual
755          * daemon. We do things this way to ensure that the main PID
756          * of the daemon is the one we initially fork()ed. */
757
758         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
759                 flags |= PAM_SILENT;
760
761         pam_code = pam_start(name, user, &conv, &handle);
762         if (pam_code != PAM_SUCCESS) {
763                 handle = NULL;
764                 goto fail;
765         }
766
767         if (tty) {
768                 pam_code = pam_set_item(handle, PAM_TTY, tty);
769                 if (pam_code != PAM_SUCCESS)
770                         goto fail;
771         }
772
773         pam_code = pam_acct_mgmt(handle, flags);
774         if (pam_code != PAM_SUCCESS)
775                 goto fail;
776
777         pam_code = pam_open_session(handle, flags);
778         if (pam_code != PAM_SUCCESS)
779                 goto fail;
780
781         close_session = true;
782
783         e = pam_getenvlist(handle);
784         if (!e) {
785                 pam_code = PAM_BUF_ERR;
786                 goto fail;
787         }
788
789         /* Block SIGTERM, so that we know that it won't get lost in
790          * the child */
791         if (sigemptyset(&ss) < 0 ||
792             sigaddset(&ss, SIGTERM) < 0 ||
793             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
794                 goto fail;
795
796         parent_pid = getpid();
797
798         pam_pid = fork();
799         if (pam_pid < 0)
800                 goto fail;
801
802         if (pam_pid == 0) {
803                 int sig;
804                 int r = EXIT_PAM;
805
806                 /* The child's job is to reset the PAM session on
807                  * termination */
808
809                 /* This string must fit in 10 chars (i.e. the length
810                  * of "/sbin/init"), to look pretty in /bin/ps */
811                 rename_process("(sd-pam)");
812
813                 /* Make sure we don't keep open the passed fds in this
814                 child. We assume that otherwise only those fds are
815                 open here that have been opened by PAM. */
816                 close_many(fds, n_fds);
817
818                 /* Drop privileges - we don't need any to pam_close_session
819                  * and this will make PR_SET_PDEATHSIG work in most cases.
820                  * If this fails, ignore the error - but expect sd-pam threads
821                  * to fail to exit normally */
822                 if (setresuid(uid, uid, uid) < 0)
823                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
824
825                 /* Wait until our parent died. This will only work if
826                  * the above setresuid() succeeds, otherwise the kernel
827                  * will not allow unprivileged parents kill their privileged
828                  * children this way. We rely on the control groups kill logic
829                  * to do the rest for us. */
830                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
831                         goto child_finish;
832
833                 /* Check if our parent process might already have
834                  * died? */
835                 if (getppid() == parent_pid) {
836                         for (;;) {
837                                 if (sigwait(&ss, &sig) < 0) {
838                                         if (errno == EINTR)
839                                                 continue;
840
841                                         goto child_finish;
842                                 }
843
844                                 assert(sig == SIGTERM);
845                                 break;
846                         }
847                 }
848
849                 /* If our parent died we'll end the session */
850                 if (getppid() != parent_pid) {
851                         pam_code = pam_close_session(handle, flags);
852                         if (pam_code != PAM_SUCCESS)
853                                 goto child_finish;
854                 }
855
856                 r = 0;
857
858         child_finish:
859                 pam_end(handle, pam_code | flags);
860                 _exit(r);
861         }
862
863         /* If the child was forked off successfully it will do all the
864          * cleanups, so forget about the handle here. */
865         handle = NULL;
866
867         /* Unblock SIGTERM again in the parent */
868         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
869                 goto fail;
870
871         /* We close the log explicitly here, since the PAM modules
872          * might have opened it, but we don't want this fd around. */
873         closelog();
874
875         *pam_env = e;
876         e = NULL;
877
878         return 0;
879
880 fail:
881         if (pam_code != PAM_SUCCESS) {
882                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
883                 err = -EPERM;  /* PAM errors do not map to errno */
884         } else {
885                 log_error("PAM failed: %m");
886                 err = -errno;
887         }
888
889         if (handle) {
890                 if (close_session)
891                         pam_code = pam_close_session(handle, flags);
892
893                 pam_end(handle, pam_code | flags);
894         }
895
896         strv_free(e);
897
898         closelog();
899
900         if (pam_pid > 1) {
901                 kill(pam_pid, SIGTERM);
902                 kill(pam_pid, SIGCONT);
903         }
904
905         return err;
906 }
907 #endif
908
909 static void rename_process_from_path(const char *path) {
910         char process_name[11];
911         const char *p;
912         size_t l;
913
914         /* This resulting string must fit in 10 chars (i.e. the length
915          * of "/sbin/init") to look pretty in /bin/ps */
916
917         p = basename(path);
918         if (isempty(p)) {
919                 rename_process("(...)");
920                 return;
921         }
922
923         l = strlen(p);
924         if (l > 8) {
925                 /* The end of the process name is usually more
926                  * interesting, since the first bit might just be
927                  * "systemd-" */
928                 p = p + l - 8;
929                 l = 8;
930         }
931
932         process_name[0] = '(';
933         memcpy(process_name+1, p, l);
934         process_name[1+l] = ')';
935         process_name[1+l+1] = 0;
936
937         rename_process(process_name);
938 }
939
940 #ifdef HAVE_SECCOMP
941
942 static int apply_seccomp(ExecContext *c) {
943         uint32_t negative_action, action;
944         scmp_filter_ctx *seccomp;
945         Iterator i;
946         void *id;
947         int r;
948
949         assert(c);
950
951         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
952
953         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
954         if (!seccomp)
955                 return -ENOMEM;
956
957         if (c->syscall_archs) {
958
959                 SET_FOREACH(id, c->syscall_archs, i) {
960                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
961                         if (r == -EEXIST)
962                                 continue;
963                         if (r < 0)
964                                 goto finish;
965                 }
966
967         } else {
968                 r = seccomp_add_secondary_archs(seccomp);
969                 if (r < 0)
970                         goto finish;
971         }
972
973         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
974         SET_FOREACH(id, c->syscall_filter, i) {
975                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
976                 if (r < 0)
977                         goto finish;
978         }
979
980         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
981         if (r < 0)
982                 goto finish;
983
984         r = seccomp_load(seccomp);
985
986 finish:
987         seccomp_release(seccomp);
988         return r;
989 }
990
991 static int apply_address_families(ExecContext *c) {
992         scmp_filter_ctx *seccomp;
993         Iterator i;
994         int r;
995
996         assert(c);
997
998         seccomp = seccomp_init(SCMP_ACT_ALLOW);
999         if (!seccomp)
1000                 return -ENOMEM;
1001
1002         r = seccomp_add_secondary_archs(seccomp);
1003         if (r < 0)
1004                 goto finish;
1005
1006         if (c->address_families_whitelist) {
1007                 int af, first = 0, last = 0;
1008                 void *afp;
1009
1010                 /* If this is a whitelist, we first block the address
1011                  * families that are out of range and then everything
1012                  * that is not in the set. First, we find the lowest
1013                  * and highest address family in the set. */
1014
1015                 SET_FOREACH(afp, c->address_families, i) {
1016                         af = PTR_TO_INT(afp);
1017
1018                         if (af <= 0 || af >= af_max())
1019                                 continue;
1020
1021                         if (first == 0 || af < first)
1022                                 first = af;
1023
1024                         if (last == 0 || af > last)
1025                                 last = af;
1026                 }
1027
1028                 assert((first == 0) == (last == 0));
1029
1030                 if (first == 0) {
1031
1032                         /* No entries in the valid range, block everything */
1033                         r = seccomp_rule_add(
1034                                         seccomp,
1035                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1036                                         SCMP_SYS(socket),
1037                                         0);
1038                         if (r < 0)
1039                                 goto finish;
1040
1041                 } else {
1042
1043                         /* Block everything below the first entry */
1044                         r = seccomp_rule_add(
1045                                         seccomp,
1046                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1047                                         SCMP_SYS(socket),
1048                                         1,
1049                                         SCMP_A0(SCMP_CMP_LT, first));
1050                         if (r < 0)
1051                                 goto finish;
1052
1053                         /* Block everything above the last entry */
1054                         r = seccomp_rule_add(
1055                                         seccomp,
1056                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1057                                         SCMP_SYS(socket),
1058                                         1,
1059                                         SCMP_A0(SCMP_CMP_GT, last));
1060                         if (r < 0)
1061                                 goto finish;
1062
1063                         /* Block everything between the first and last
1064                          * entry */
1065                         for (af = 1; af < af_max(); af++) {
1066
1067                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1068                                         continue;
1069
1070                                 r = seccomp_rule_add(
1071                                                 seccomp,
1072                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1073                                                 SCMP_SYS(socket),
1074                                                 1,
1075                                                 SCMP_A0(SCMP_CMP_EQ, af));
1076                                 if (r < 0)
1077                                         goto finish;
1078                         }
1079                 }
1080
1081         } else {
1082                 void *af;
1083
1084                 /* If this is a blacklist, then generate one rule for
1085                  * each address family that are then combined in OR
1086                  * checks. */
1087
1088                 SET_FOREACH(af, c->address_families, i) {
1089
1090                         r = seccomp_rule_add(
1091                                         seccomp,
1092                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1093                                         SCMP_SYS(socket),
1094                                         1,
1095                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1096                         if (r < 0)
1097                                 goto finish;
1098                 }
1099         }
1100
1101         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1102         if (r < 0)
1103                 goto finish;
1104
1105         r = seccomp_load(seccomp);
1106
1107 finish:
1108         seccomp_release(seccomp);
1109         return r;
1110 }
1111
1112 #endif
1113
1114 static void do_idle_pipe_dance(int idle_pipe[4]) {
1115         assert(idle_pipe);
1116
1117
1118         safe_close(idle_pipe[1]);
1119         safe_close(idle_pipe[2]);
1120
1121         if (idle_pipe[0] >= 0) {
1122                 int r;
1123
1124                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1125
1126                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1127                         /* Signal systemd that we are bored and want to continue. */
1128                         write(idle_pipe[3], "x", 1);
1129
1130                         /* Wait for systemd to react to the signal above. */
1131                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1132                 }
1133
1134                 safe_close(idle_pipe[0]);
1135
1136         }
1137
1138         safe_close(idle_pipe[3]);
1139 }
1140
1141 static int build_environment(
1142                 ExecContext *c,
1143                 unsigned n_fds,
1144                 usec_t watchdog_usec,
1145                 const char *home,
1146                 const char *username,
1147                 const char *shell,
1148                 char ***ret) {
1149
1150         _cleanup_strv_free_ char **our_env = NULL;
1151         unsigned n_env = 0;
1152         char *x;
1153
1154         assert(c);
1155         assert(ret);
1156
1157         our_env = new0(char*, 10);
1158         if (!our_env)
1159                 return -ENOMEM;
1160
1161         if (n_fds > 0) {
1162                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1163                         return -ENOMEM;
1164                 our_env[n_env++] = x;
1165
1166                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1167                         return -ENOMEM;
1168                 our_env[n_env++] = x;
1169         }
1170
1171         if (watchdog_usec > 0) {
1172                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1173                         return -ENOMEM;
1174                 our_env[n_env++] = x;
1175
1176                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1177                         return -ENOMEM;
1178                 our_env[n_env++] = x;
1179         }
1180
1181         if (home) {
1182                 x = strappend("HOME=", home);
1183                 if (!x)
1184                         return -ENOMEM;
1185                 our_env[n_env++] = x;
1186         }
1187
1188         if (username) {
1189                 x = strappend("LOGNAME=", username);
1190                 if (!x)
1191                         return -ENOMEM;
1192                 our_env[n_env++] = x;
1193
1194                 x = strappend("USER=", username);
1195                 if (!x)
1196                         return -ENOMEM;
1197                 our_env[n_env++] = x;
1198         }
1199
1200         if (shell) {
1201                 x = strappend("SHELL=", shell);
1202                 if (!x)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205         }
1206
1207         if (is_terminal_input(c->std_input) ||
1208             c->std_output == EXEC_OUTPUT_TTY ||
1209             c->std_error == EXEC_OUTPUT_TTY ||
1210             c->tty_path) {
1211
1212                 x = strdup(default_term_for_tty(tty_path(c)));
1213                 if (!x)
1214                         return -ENOMEM;
1215                 our_env[n_env++] = x;
1216         }
1217
1218         our_env[n_env++] = NULL;
1219         assert(n_env <= 10);
1220
1221         *ret = our_env;
1222         our_env = NULL;
1223
1224         return 0;
1225 }
1226
1227 int exec_spawn(ExecCommand *command,
1228                char **argv,
1229                ExecContext *context,
1230                int fds[], unsigned n_fds,
1231                char **environment,
1232                bool apply_permissions,
1233                bool apply_chroot,
1234                bool apply_tty_stdin,
1235                bool confirm_spawn,
1236                CGroupControllerMask cgroup_supported,
1237                const char *cgroup_path,
1238                const char *runtime_prefix,
1239                const char *unit_id,
1240                usec_t watchdog_usec,
1241                int idle_pipe[4],
1242                ExecRuntime *runtime,
1243                pid_t *ret) {
1244
1245         _cleanup_strv_free_ char **files_env = NULL;
1246         int socket_fd;
1247         char *line;
1248         pid_t pid;
1249         int r;
1250
1251         assert(command);
1252         assert(context);
1253         assert(ret);
1254         assert(fds || n_fds <= 0);
1255
1256         if (context->std_input == EXEC_INPUT_SOCKET ||
1257             context->std_output == EXEC_OUTPUT_SOCKET ||
1258             context->std_error == EXEC_OUTPUT_SOCKET) {
1259
1260                 if (n_fds != 1)
1261                         return -EINVAL;
1262
1263                 socket_fd = fds[0];
1264
1265                 fds = NULL;
1266                 n_fds = 0;
1267         } else
1268                 socket_fd = -1;
1269
1270         r = exec_context_load_environment(context, &files_env);
1271         if (r < 0) {
1272                 log_struct_unit(LOG_ERR,
1273                            unit_id,
1274                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1275                            "ERRNO=%d", -r,
1276                            NULL);
1277                 return r;
1278         }
1279
1280         if (!argv)
1281                 argv = command->argv;
1282
1283         line = exec_command_line(argv);
1284         if (!line)
1285                 return log_oom();
1286
1287         log_struct_unit(LOG_DEBUG,
1288                         unit_id,
1289                         "EXECUTABLE=%s", command->path,
1290                         "MESSAGE=About to execute: %s", line,
1291                         NULL);
1292         free(line);
1293
1294         pid = fork();
1295         if (pid < 0)
1296                 return -errno;
1297
1298         if (pid == 0) {
1299                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1300                 const char *username = NULL, *home = NULL, *shell = NULL;
1301                 unsigned n_dont_close = 0;
1302                 int dont_close[n_fds + 3];
1303                 uid_t uid = (uid_t) -1;
1304                 gid_t gid = (gid_t) -1;
1305                 sigset_t ss;
1306                 int i, err;
1307
1308                 /* child */
1309
1310                 rename_process_from_path(command->path);
1311
1312                 /* We reset exactly these signals, since they are the
1313                  * only ones we set to SIG_IGN in the main daemon. All
1314                  * others we leave untouched because we set them to
1315                  * SIG_DFL or a valid handler initially, both of which
1316                  * will be demoted to SIG_DFL. */
1317                 default_signals(SIGNALS_CRASH_HANDLER,
1318                                 SIGNALS_IGNORE, -1);
1319
1320                 if (context->ignore_sigpipe)
1321                         ignore_signals(SIGPIPE, -1);
1322
1323                 assert_se(sigemptyset(&ss) == 0);
1324                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1325                         err = -errno;
1326                         r = EXIT_SIGNAL_MASK;
1327                         goto fail_child;
1328                 }
1329
1330                 if (idle_pipe)
1331                         do_idle_pipe_dance(idle_pipe);
1332
1333                 /* Close sockets very early to make sure we don't
1334                  * block init reexecution because it cannot bind its
1335                  * sockets */
1336                 log_forget_fds();
1337
1338                 if (socket_fd >= 0)
1339                         dont_close[n_dont_close++] = socket_fd;
1340                 if (n_fds > 0) {
1341                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1342                         n_dont_close += n_fds;
1343                 }
1344                 if (runtime) {
1345                         if (runtime->netns_storage_socket[0] >= 0)
1346                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1347                         if (runtime->netns_storage_socket[1] >= 0)
1348                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1349                 }
1350
1351                 err = close_all_fds(dont_close, n_dont_close);
1352                 if (err < 0) {
1353                         r = EXIT_FDS;
1354                         goto fail_child;
1355                 }
1356
1357                 if (!context->same_pgrp)
1358                         if (setsid() < 0) {
1359                                 err = -errno;
1360                                 r = EXIT_SETSID;
1361                                 goto fail_child;
1362                         }
1363
1364                 exec_context_tty_reset(context);
1365
1366                 if (confirm_spawn) {
1367                         char response;
1368
1369                         err = ask_for_confirmation(&response, argv);
1370                         if (err == -ETIMEDOUT)
1371                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1372                         else if (err < 0)
1373                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1374                         else if (response == 's') {
1375                                 write_confirm_message("Skipping execution.\n");
1376                                 err = -ECANCELED;
1377                                 r = EXIT_CONFIRM;
1378                                 goto fail_child;
1379                         } else if (response == 'n') {
1380                                 write_confirm_message("Failing execution.\n");
1381                                 err = r = 0;
1382                                 goto fail_child;
1383                         }
1384                 }
1385
1386                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1387                  * must sure to drop O_NONBLOCK */
1388                 if (socket_fd >= 0)
1389                         fd_nonblock(socket_fd, false);
1390
1391                 err = setup_input(context, socket_fd, apply_tty_stdin);
1392                 if (err < 0) {
1393                         r = EXIT_STDIN;
1394                         goto fail_child;
1395                 }
1396
1397                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1398                 if (err < 0) {
1399                         r = EXIT_STDOUT;
1400                         goto fail_child;
1401                 }
1402
1403                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1404                 if (err < 0) {
1405                         r = EXIT_STDERR;
1406                         goto fail_child;
1407                 }
1408
1409                 if (cgroup_path) {
1410                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1411                         if (err < 0) {
1412                                 r = EXIT_CGROUP;
1413                                 goto fail_child;
1414                         }
1415                 }
1416
1417                 if (context->oom_score_adjust_set) {
1418                         char t[16];
1419
1420                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1421                         char_array_0(t);
1422
1423                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1424                                 err = -errno;
1425                                 r = EXIT_OOM_ADJUST;
1426                                 goto fail_child;
1427                         }
1428                 }
1429
1430                 if (context->nice_set)
1431                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1432                                 err = -errno;
1433                                 r = EXIT_NICE;
1434                                 goto fail_child;
1435                         }
1436
1437                 if (context->cpu_sched_set) {
1438                         struct sched_param param = {
1439                                 .sched_priority = context->cpu_sched_priority,
1440                         };
1441
1442                         r = sched_setscheduler(0,
1443                                                context->cpu_sched_policy |
1444                                                (context->cpu_sched_reset_on_fork ?
1445                                                 SCHED_RESET_ON_FORK : 0),
1446                                                &param);
1447                         if (r < 0) {
1448                                 err = -errno;
1449                                 r = EXIT_SETSCHEDULER;
1450                                 goto fail_child;
1451                         }
1452                 }
1453
1454                 if (context->cpuset)
1455                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1456                                 err = -errno;
1457                                 r = EXIT_CPUAFFINITY;
1458                                 goto fail_child;
1459                         }
1460
1461                 if (context->ioprio_set)
1462                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1463                                 err = -errno;
1464                                 r = EXIT_IOPRIO;
1465                                 goto fail_child;
1466                         }
1467
1468                 if (context->timer_slack_nsec != (nsec_t) -1)
1469                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1470                                 err = -errno;
1471                                 r = EXIT_TIMERSLACK;
1472                                 goto fail_child;
1473                         }
1474
1475                 if (context->personality != 0xffffffffUL)
1476                         if (personality(context->personality) < 0) {
1477                                 err = -errno;
1478                                 r = EXIT_PERSONALITY;
1479                                 goto fail_child;
1480                         }
1481
1482                 if (context->utmp_id)
1483                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1484
1485                 if (context->user) {
1486                         username = context->user;
1487                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1488                         if (err < 0) {
1489                                 r = EXIT_USER;
1490                                 goto fail_child;
1491                         }
1492
1493                         if (is_terminal_input(context->std_input)) {
1494                                 err = chown_terminal(STDIN_FILENO, uid);
1495                                 if (err < 0) {
1496                                         r = EXIT_STDIN;
1497                                         goto fail_child;
1498                                 }
1499                         }
1500                 }
1501
1502 #ifdef HAVE_PAM
1503                 if (cgroup_path && context->user && context->pam_name) {
1504                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1505                         if (err < 0) {
1506                                 r = EXIT_CGROUP;
1507                                 goto fail_child;
1508                         }
1509
1510
1511                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1512                         if (err < 0) {
1513                                 r = EXIT_CGROUP;
1514                                 goto fail_child;
1515                         }
1516                 }
1517 #endif
1518
1519                 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1520                         char **rt;
1521
1522                         STRV_FOREACH(rt, context->runtime_directory) {
1523                                 _cleanup_free_ char *p;
1524
1525                                 p = strjoin(runtime_prefix, "/", *rt, NULL);
1526                                 if (!p) {
1527                                         r = EXIT_RUNTIME_DIRECTORY;
1528                                         err = -ENOMEM;
1529                                         goto fail_child;
1530                                 }
1531
1532                                 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1533                                 if (err < 0) {
1534                                         r = EXIT_RUNTIME_DIRECTORY;
1535                                         goto fail_child;
1536                                 }
1537                         }
1538                 }
1539
1540                 if (apply_permissions) {
1541                         err = enforce_groups(context, username, gid);
1542                         if (err < 0) {
1543                                 r = EXIT_GROUP;
1544                                 goto fail_child;
1545                         }
1546                 }
1547
1548                 umask(context->umask);
1549
1550 #ifdef HAVE_PAM
1551                 if (apply_permissions && context->pam_name && username) {
1552                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1553                         if (err < 0) {
1554                                 r = EXIT_PAM;
1555                                 goto fail_child;
1556                         }
1557                 }
1558 #endif
1559                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1560                         err = setup_netns(runtime->netns_storage_socket);
1561                         if (err < 0) {
1562                                 r = EXIT_NETWORK;
1563                                 goto fail_child;
1564                         }
1565                 }
1566
1567                 if (!strv_isempty(context->read_write_dirs) ||
1568                     !strv_isempty(context->read_only_dirs) ||
1569                     !strv_isempty(context->inaccessible_dirs) ||
1570                     context->mount_flags != 0 ||
1571                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1572                     context->private_devices) {
1573
1574                         char *tmp = NULL, *var = NULL;
1575
1576                         /* The runtime struct only contains the parent
1577                          * of the private /tmp, which is
1578                          * non-accessible to world users. Inside of it
1579                          * there's a /tmp that is sticky, and that's
1580                          * the one we want to use here. */
1581
1582                         if (context->private_tmp && runtime) {
1583                                 if (runtime->tmp_dir)
1584                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1585                                 if (runtime->var_tmp_dir)
1586                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1587                         }
1588
1589                         err = setup_namespace(
1590                                         context->read_write_dirs,
1591                                         context->read_only_dirs,
1592                                         context->inaccessible_dirs,
1593                                         tmp,
1594                                         var,
1595                                         context->private_devices,
1596                                         context->mount_flags);
1597
1598                         if (err < 0) {
1599                                 r = EXIT_NAMESPACE;
1600                                 goto fail_child;
1601                         }
1602                 }
1603
1604                 if (apply_chroot) {
1605                         if (context->root_directory)
1606                                 if (chroot(context->root_directory) < 0) {
1607                                         err = -errno;
1608                                         r = EXIT_CHROOT;
1609                                         goto fail_child;
1610                                 }
1611
1612                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1613                                 err = -errno;
1614                                 r = EXIT_CHDIR;
1615                                 goto fail_child;
1616                         }
1617                 } else {
1618                         _cleanup_free_ char *d = NULL;
1619
1620                         if (asprintf(&d, "%s/%s",
1621                                      context->root_directory ? context->root_directory : "",
1622                                      context->working_directory ? context->working_directory : "") < 0) {
1623                                 err = -ENOMEM;
1624                                 r = EXIT_MEMORY;
1625                                 goto fail_child;
1626                         }
1627
1628                         if (chdir(d) < 0) {
1629                                 err = -errno;
1630                                 r = EXIT_CHDIR;
1631                                 goto fail_child;
1632                         }
1633                 }
1634
1635                 /* We repeat the fd closing here, to make sure that
1636                  * nothing is leaked from the PAM modules */
1637                 err = close_all_fds(fds, n_fds);
1638                 if (err >= 0)
1639                         err = shift_fds(fds, n_fds);
1640                 if (err >= 0)
1641                         err = flags_fds(fds, n_fds, context->non_blocking);
1642                 if (err < 0) {
1643                         r = EXIT_FDS;
1644                         goto fail_child;
1645                 }
1646
1647                 if (apply_permissions) {
1648
1649                         for (i = 0; i < _RLIMIT_MAX; i++) {
1650                                 if (!context->rlimit[i])
1651                                         continue;
1652
1653                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1654                                         err = -errno;
1655                                         r = EXIT_LIMITS;
1656                                         goto fail_child;
1657                                 }
1658                         }
1659
1660                         if (context->capability_bounding_set_drop) {
1661                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1662                                 if (err < 0) {
1663                                         r = EXIT_CAPABILITIES;
1664                                         goto fail_child;
1665                                 }
1666                         }
1667
1668                         if (context->user) {
1669                                 err = enforce_user(context, uid);
1670                                 if (err < 0) {
1671                                         r = EXIT_USER;
1672                                         goto fail_child;
1673                                 }
1674                         }
1675
1676                         /* PR_GET_SECUREBITS is not privileged, while
1677                          * PR_SET_SECUREBITS is. So to suppress
1678                          * potential EPERMs we'll try not to call
1679                          * PR_SET_SECUREBITS unless necessary. */
1680                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1681                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1682                                         err = -errno;
1683                                         r = EXIT_SECUREBITS;
1684                                         goto fail_child;
1685                                 }
1686
1687                         if (context->capabilities)
1688                                 if (cap_set_proc(context->capabilities) < 0) {
1689                                         err = -errno;
1690                                         r = EXIT_CAPABILITIES;
1691                                         goto fail_child;
1692                                 }
1693
1694                         if (context->no_new_privileges)
1695                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1696                                         err = -errno;
1697                                         r = EXIT_NO_NEW_PRIVILEGES;
1698                                         goto fail_child;
1699                                 }
1700
1701 #ifdef HAVE_SECCOMP
1702                         if (context->address_families_whitelist ||
1703                             !set_isempty(context->address_families)) {
1704                                 err = apply_address_families(context);
1705                                 if (err < 0) {
1706                                         r = EXIT_ADDRESS_FAMILIES;
1707                                         goto fail_child;
1708                                 }
1709                         }
1710
1711                         if (context->syscall_whitelist ||
1712                             !set_isempty(context->syscall_filter) ||
1713                             !set_isempty(context->syscall_archs)) {
1714                                 err = apply_seccomp(context);
1715                                 if (err < 0) {
1716                                         r = EXIT_SECCOMP;
1717                                         goto fail_child;
1718                                 }
1719                         }
1720 #endif
1721
1722 #ifdef HAVE_SELINUX
1723                         if (context->selinux_context && use_selinux()) {
1724                                 err = setexeccon(context->selinux_context);
1725                                 if (err < 0 && !context->selinux_context_ignore) {
1726                                         r = EXIT_SELINUX_CONTEXT;
1727                                         goto fail_child;
1728                                 }
1729                         }
1730 #endif
1731
1732 #ifdef HAVE_APPARMOR
1733                         if (context->apparmor_profile && use_apparmor()) {
1734                                 err = aa_change_onexec(context->apparmor_profile);
1735                                 if (err < 0 && !context->apparmor_profile_ignore) {
1736                                         r = EXIT_APPARMOR_PROFILE;
1737                                         goto fail_child;
1738                                 }
1739                         }
1740 #endif
1741                 }
1742
1743                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1744                 if (r < 0) {
1745                         r = EXIT_MEMORY;
1746                         goto fail_child;
1747                 }
1748
1749                 final_env = strv_env_merge(5,
1750                                            environment,
1751                                            our_env,
1752                                            context->environment,
1753                                            files_env,
1754                                            pam_env,
1755                                            NULL);
1756                 if (!final_env) {
1757                         err = -ENOMEM;
1758                         r = EXIT_MEMORY;
1759                         goto fail_child;
1760                 }
1761
1762                 final_argv = replace_env_argv(argv, final_env);
1763                 if (!final_argv) {
1764                         err = -ENOMEM;
1765                         r = EXIT_MEMORY;
1766                         goto fail_child;
1767                 }
1768
1769                 final_env = strv_env_clean(final_env);
1770
1771                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1772                         line = exec_command_line(final_argv);
1773                         if (line) {
1774                                 log_open();
1775                                 log_struct_unit(LOG_DEBUG,
1776                                                 unit_id,
1777                                                 "EXECUTABLE=%s", command->path,
1778                                                 "MESSAGE=Executing: %s", line,
1779                                                 NULL);
1780                                 log_close();
1781                                 free(line);
1782                                 line = NULL;
1783                         }
1784                 }
1785                 execve(command->path, final_argv, final_env);
1786                 err = -errno;
1787                 r = EXIT_EXEC;
1788
1789         fail_child:
1790                 if (r != 0) {
1791                         log_open();
1792                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1793                                    "EXECUTABLE=%s", command->path,
1794                                    "MESSAGE=Failed at step %s spawning %s: %s",
1795                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1796                                           command->path, strerror(-err),
1797                                    "ERRNO=%d", -err,
1798                                    NULL);
1799                         log_close();
1800                 }
1801
1802                 _exit(r);
1803         }
1804
1805         log_struct_unit(LOG_DEBUG,
1806                         unit_id,
1807                         "MESSAGE=Forked %s as "PID_FMT,
1808                         command->path, pid,
1809                         NULL);
1810
1811         /* We add the new process to the cgroup both in the child (so
1812          * that we can be sure that no user code is ever executed
1813          * outside of the cgroup) and in the parent (so that we can be
1814          * sure that when we kill the cgroup the process will be
1815          * killed too). */
1816         if (cgroup_path)
1817                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1818
1819         exec_status_start(&command->exec_status, pid);
1820
1821         *ret = pid;
1822         return 0;
1823 }
1824
1825 void exec_context_init(ExecContext *c) {
1826         assert(c);
1827
1828         c->umask = 0022;
1829         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1830         c->cpu_sched_policy = SCHED_OTHER;
1831         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1832         c->syslog_level_prefix = true;
1833         c->ignore_sigpipe = true;
1834         c->timer_slack_nsec = (nsec_t) -1;
1835         c->personality = 0xffffffffUL;
1836         c->runtime_directory_mode = 0755;
1837 }
1838
1839 void exec_context_done(ExecContext *c) {
1840         unsigned l;
1841
1842         assert(c);
1843
1844         strv_free(c->environment);
1845         c->environment = NULL;
1846
1847         strv_free(c->environment_files);
1848         c->environment_files = NULL;
1849
1850         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1851                 free(c->rlimit[l]);
1852                 c->rlimit[l] = NULL;
1853         }
1854
1855         free(c->working_directory);
1856         c->working_directory = NULL;
1857         free(c->root_directory);
1858         c->root_directory = NULL;
1859
1860         free(c->tty_path);
1861         c->tty_path = NULL;
1862
1863         free(c->syslog_identifier);
1864         c->syslog_identifier = NULL;
1865
1866         free(c->user);
1867         c->user = NULL;
1868
1869         free(c->group);
1870         c->group = NULL;
1871
1872         strv_free(c->supplementary_groups);
1873         c->supplementary_groups = NULL;
1874
1875         free(c->pam_name);
1876         c->pam_name = NULL;
1877
1878         if (c->capabilities) {
1879                 cap_free(c->capabilities);
1880                 c->capabilities = NULL;
1881         }
1882
1883         strv_free(c->read_only_dirs);
1884         c->read_only_dirs = NULL;
1885
1886         strv_free(c->read_write_dirs);
1887         c->read_write_dirs = NULL;
1888
1889         strv_free(c->inaccessible_dirs);
1890         c->inaccessible_dirs = NULL;
1891
1892         if (c->cpuset)
1893                 CPU_FREE(c->cpuset);
1894
1895         free(c->utmp_id);
1896         c->utmp_id = NULL;
1897
1898         free(c->selinux_context);
1899         c->selinux_context = NULL;
1900
1901         free(c->apparmor_profile);
1902         c->apparmor_profile = NULL;
1903
1904         set_free(c->syscall_filter);
1905         c->syscall_filter = NULL;
1906
1907         set_free(c->syscall_archs);
1908         c->syscall_archs = NULL;
1909
1910         set_free(c->address_families);
1911         c->address_families = NULL;
1912
1913         strv_free(c->runtime_directory);
1914         c->runtime_directory = NULL;
1915 }
1916
1917 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1918         char **i;
1919
1920         assert(c);
1921
1922         if (!runtime_prefix)
1923                 return 0;
1924
1925         STRV_FOREACH(i, c->runtime_directory) {
1926                 _cleanup_free_ char *p;
1927
1928                 p = strjoin(runtime_prefix, "/", *i, NULL);
1929                 if (!p)
1930                         return -ENOMEM;
1931
1932                 /* We execute this synchronously, since we need to be
1933                  * sure this is gone when we start the service
1934                  * next. */
1935                 rm_rf_dangerous(p, false, true, false);
1936         }
1937
1938         return 0;
1939 }
1940
1941 void exec_command_done(ExecCommand *c) {
1942         assert(c);
1943
1944         free(c->path);
1945         c->path = NULL;
1946
1947         strv_free(c->argv);
1948         c->argv = NULL;
1949 }
1950
1951 void exec_command_done_array(ExecCommand *c, unsigned n) {
1952         unsigned i;
1953
1954         for (i = 0; i < n; i++)
1955                 exec_command_done(c+i);
1956 }
1957
1958 void exec_command_free_list(ExecCommand *c) {
1959         ExecCommand *i;
1960
1961         while ((i = c)) {
1962                 LIST_REMOVE(command, c, i);
1963                 exec_command_done(i);
1964                 free(i);
1965         }
1966 }
1967
1968 void exec_command_free_array(ExecCommand **c, unsigned n) {
1969         unsigned i;
1970
1971         for (i = 0; i < n; i++) {
1972                 exec_command_free_list(c[i]);
1973                 c[i] = NULL;
1974         }
1975 }
1976
1977 int exec_context_load_environment(const ExecContext *c, char ***l) {
1978         char **i, **r = NULL;
1979
1980         assert(c);
1981         assert(l);
1982
1983         STRV_FOREACH(i, c->environment_files) {
1984                 char *fn;
1985                 int k;
1986                 bool ignore = false;
1987                 char **p;
1988                 _cleanup_globfree_ glob_t pglob = {};
1989                 int count, n;
1990
1991                 fn = *i;
1992
1993                 if (fn[0] == '-') {
1994                         ignore = true;
1995                         fn ++;
1996                 }
1997
1998                 if (!path_is_absolute(fn)) {
1999                         if (ignore)
2000                                 continue;
2001
2002                         strv_free(r);
2003                         return -EINVAL;
2004                 }
2005
2006                 /* Filename supports globbing, take all matching files */
2007                 errno = 0;
2008                 if (glob(fn, 0, NULL, &pglob) != 0) {
2009                         if (ignore)
2010                                 continue;
2011
2012                         strv_free(r);
2013                         return errno ? -errno : -EINVAL;
2014                 }
2015                 count = pglob.gl_pathc;
2016                 if (count == 0) {
2017                         if (ignore)
2018                                 continue;
2019
2020                         strv_free(r);
2021                         return -EINVAL;
2022                 }
2023                 for (n = 0; n < count; n++) {
2024                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2025                         if (k < 0) {
2026                                 if (ignore)
2027                                         continue;
2028
2029                                 strv_free(r);
2030                                 return k;
2031                         }
2032                         /* Log invalid environment variables with filename */
2033                         if (p)
2034                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2035
2036                         if (r == NULL)
2037                                 r = p;
2038                         else {
2039                                 char **m;
2040
2041                                 m = strv_env_merge(2, r, p);
2042                                 strv_free(r);
2043                                 strv_free(p);
2044                                 if (!m)
2045                                         return -ENOMEM;
2046
2047                                 r = m;
2048                         }
2049                 }
2050         }
2051
2052         *l = r;
2053
2054         return 0;
2055 }
2056
2057 static bool tty_may_match_dev_console(const char *tty) {
2058         char *active = NULL, *console;
2059         bool b;
2060
2061         if (startswith(tty, "/dev/"))
2062                 tty += 5;
2063
2064         /* trivial identity? */
2065         if (streq(tty, "console"))
2066                 return true;
2067
2068         console = resolve_dev_console(&active);
2069         /* if we could not resolve, assume it may */
2070         if (!console)
2071                 return true;
2072
2073         /* "tty0" means the active VC, so it may be the same sometimes */
2074         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2075         free(active);
2076
2077         return b;
2078 }
2079
2080 bool exec_context_may_touch_console(ExecContext *ec) {
2081         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2082                 is_terminal_input(ec->std_input) ||
2083                 is_terminal_output(ec->std_output) ||
2084                 is_terminal_output(ec->std_error)) &&
2085                tty_may_match_dev_console(tty_path(ec));
2086 }
2087
2088 static void strv_fprintf(FILE *f, char **l) {
2089         char **g;
2090
2091         assert(f);
2092
2093         STRV_FOREACH(g, l)
2094                 fprintf(f, " %s", *g);
2095 }
2096
2097 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2098         char **e;
2099         unsigned i;
2100
2101         assert(c);
2102         assert(f);
2103
2104         prefix = strempty(prefix);
2105
2106         fprintf(f,
2107                 "%sUMask: %04o\n"
2108                 "%sWorkingDirectory: %s\n"
2109                 "%sRootDirectory: %s\n"
2110                 "%sNonBlocking: %s\n"
2111                 "%sPrivateTmp: %s\n"
2112                 "%sPrivateNetwork: %s\n"
2113                 "%sPrivateDevices: %s\n"
2114                 "%sIgnoreSIGPIPE: %s\n",
2115                 prefix, c->umask,
2116                 prefix, c->working_directory ? c->working_directory : "/",
2117                 prefix, c->root_directory ? c->root_directory : "/",
2118                 prefix, yes_no(c->non_blocking),
2119                 prefix, yes_no(c->private_tmp),
2120                 prefix, yes_no(c->private_network),
2121                 prefix, yes_no(c->private_devices),
2122                 prefix, yes_no(c->ignore_sigpipe));
2123
2124         STRV_FOREACH(e, c->environment)
2125                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2126
2127         STRV_FOREACH(e, c->environment_files)
2128                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2129
2130         if (c->nice_set)
2131                 fprintf(f,
2132                         "%sNice: %i\n",
2133                         prefix, c->nice);
2134
2135         if (c->oom_score_adjust_set)
2136                 fprintf(f,
2137                         "%sOOMScoreAdjust: %i\n",
2138                         prefix, c->oom_score_adjust);
2139
2140         for (i = 0; i < RLIM_NLIMITS; i++)
2141                 if (c->rlimit[i])
2142                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2143                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2144
2145         if (c->ioprio_set) {
2146                 _cleanup_free_ char *class_str = NULL;
2147
2148                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2149                 fprintf(f,
2150                         "%sIOSchedulingClass: %s\n"
2151                         "%sIOPriority: %i\n",
2152                         prefix, strna(class_str),
2153                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2154         }
2155
2156         if (c->cpu_sched_set) {
2157                 _cleanup_free_ char *policy_str = NULL;
2158
2159                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2160                 fprintf(f,
2161                         "%sCPUSchedulingPolicy: %s\n"
2162                         "%sCPUSchedulingPriority: %i\n"
2163                         "%sCPUSchedulingResetOnFork: %s\n",
2164                         prefix, strna(policy_str),
2165                         prefix, c->cpu_sched_priority,
2166                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2167         }
2168
2169         if (c->cpuset) {
2170                 fprintf(f, "%sCPUAffinity:", prefix);
2171                 for (i = 0; i < c->cpuset_ncpus; i++)
2172                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2173                                 fprintf(f, " %u", i);
2174                 fputs("\n", f);
2175         }
2176
2177         if (c->timer_slack_nsec != (nsec_t) -1)
2178                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2179
2180         fprintf(f,
2181                 "%sStandardInput: %s\n"
2182                 "%sStandardOutput: %s\n"
2183                 "%sStandardError: %s\n",
2184                 prefix, exec_input_to_string(c->std_input),
2185                 prefix, exec_output_to_string(c->std_output),
2186                 prefix, exec_output_to_string(c->std_error));
2187
2188         if (c->tty_path)
2189                 fprintf(f,
2190                         "%sTTYPath: %s\n"
2191                         "%sTTYReset: %s\n"
2192                         "%sTTYVHangup: %s\n"
2193                         "%sTTYVTDisallocate: %s\n",
2194                         prefix, c->tty_path,
2195                         prefix, yes_no(c->tty_reset),
2196                         prefix, yes_no(c->tty_vhangup),
2197                         prefix, yes_no(c->tty_vt_disallocate));
2198
2199         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2200             c->std_output == EXEC_OUTPUT_KMSG ||
2201             c->std_output == EXEC_OUTPUT_JOURNAL ||
2202             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2203             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2204             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2205             c->std_error == EXEC_OUTPUT_SYSLOG ||
2206             c->std_error == EXEC_OUTPUT_KMSG ||
2207             c->std_error == EXEC_OUTPUT_JOURNAL ||
2208             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2209             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2210             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2211
2212                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2213
2214                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2215                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2216
2217                 fprintf(f,
2218                         "%sSyslogFacility: %s\n"
2219                         "%sSyslogLevel: %s\n",
2220                         prefix, strna(fac_str),
2221                         prefix, strna(lvl_str));
2222         }
2223
2224         if (c->capabilities) {
2225                 _cleanup_cap_free_charp_ char *t;
2226
2227                 t = cap_to_text(c->capabilities, NULL);
2228                 if (t)
2229                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2230         }
2231
2232         if (c->secure_bits)
2233                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2234                         prefix,
2235                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2236                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2237                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2238                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2239                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2240                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2241
2242         if (c->capability_bounding_set_drop) {
2243                 unsigned long l;
2244                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2245
2246                 for (l = 0; l <= cap_last_cap(); l++)
2247                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2248                                 _cleanup_cap_free_charp_ char *t;
2249
2250                                 t = cap_to_name(l);
2251                                 if (t)
2252                                         fprintf(f, " %s", t);
2253                         }
2254
2255                 fputs("\n", f);
2256         }
2257
2258         if (c->user)
2259                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2260         if (c->group)
2261                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2262
2263         if (strv_length(c->supplementary_groups) > 0) {
2264                 fprintf(f, "%sSupplementaryGroups:", prefix);
2265                 strv_fprintf(f, c->supplementary_groups);
2266                 fputs("\n", f);
2267         }
2268
2269         if (c->pam_name)
2270                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2271
2272         if (strv_length(c->read_write_dirs) > 0) {
2273                 fprintf(f, "%sReadWriteDirs:", prefix);
2274                 strv_fprintf(f, c->read_write_dirs);
2275                 fputs("\n", f);
2276         }
2277
2278         if (strv_length(c->read_only_dirs) > 0) {
2279                 fprintf(f, "%sReadOnlyDirs:", prefix);
2280                 strv_fprintf(f, c->read_only_dirs);
2281                 fputs("\n", f);
2282         }
2283
2284         if (strv_length(c->inaccessible_dirs) > 0) {
2285                 fprintf(f, "%sInaccessibleDirs:", prefix);
2286                 strv_fprintf(f, c->inaccessible_dirs);
2287                 fputs("\n", f);
2288         }
2289
2290         if (c->utmp_id)
2291                 fprintf(f,
2292                         "%sUtmpIdentifier: %s\n",
2293                         prefix, c->utmp_id);
2294
2295         if (c->selinux_context)
2296                 fprintf(f,
2297                         "%sSELinuxContext: %s%s\n",
2298                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2299
2300         if (c->personality != 0xffffffffUL)
2301                 fprintf(f,
2302                         "%sPersonality: %s\n",
2303                         prefix, strna(personality_to_string(c->personality)));
2304
2305         if (c->syscall_filter) {
2306 #ifdef HAVE_SECCOMP
2307                 Iterator j;
2308                 void *id;
2309                 bool first = true;
2310 #endif
2311
2312                 fprintf(f,
2313                         "%sSystemCallFilter: ",
2314                         prefix);
2315
2316                 if (!c->syscall_whitelist)
2317                         fputc('~', f);
2318
2319 #ifdef HAVE_SECCOMP
2320                 SET_FOREACH(id, c->syscall_filter, j) {
2321                         _cleanup_free_ char *name = NULL;
2322
2323                         if (first)
2324                                 first = false;
2325                         else
2326                                 fputc(' ', f);
2327
2328                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2329                         fputs(strna(name), f);
2330                 }
2331 #endif
2332
2333                 fputc('\n', f);
2334         }
2335
2336         if (c->syscall_archs) {
2337 #ifdef HAVE_SECCOMP
2338                 Iterator j;
2339                 void *id;
2340 #endif
2341
2342                 fprintf(f,
2343                         "%sSystemCallArchitectures:",
2344                         prefix);
2345
2346 #ifdef HAVE_SECCOMP
2347                 SET_FOREACH(id, c->syscall_archs, j)
2348                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2349 #endif
2350                 fputc('\n', f);
2351         }
2352
2353         if (c->syscall_errno != 0)
2354                 fprintf(f,
2355                         "%sSystemCallErrorNumber: %s\n",
2356                         prefix, strna(errno_to_name(c->syscall_errno)));
2357
2358         if (c->apparmor_profile)
2359                 fprintf(f,
2360                         "%sAppArmorProfile: %s%s\n",
2361                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2362 }
2363
2364 void exec_status_start(ExecStatus *s, pid_t pid) {
2365         assert(s);
2366
2367         zero(*s);
2368         s->pid = pid;
2369         dual_timestamp_get(&s->start_timestamp);
2370 }
2371
2372 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2373         assert(s);
2374
2375         if (s->pid && s->pid != pid)
2376                 zero(*s);
2377
2378         s->pid = pid;
2379         dual_timestamp_get(&s->exit_timestamp);
2380
2381         s->code = code;
2382         s->status = status;
2383
2384         if (context) {
2385                 if (context->utmp_id)
2386                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2387
2388                 exec_context_tty_reset(context);
2389         }
2390 }
2391
2392 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2393         char buf[FORMAT_TIMESTAMP_MAX];
2394
2395         assert(s);
2396         assert(f);
2397
2398         if (!prefix)
2399                 prefix = "";
2400
2401         if (s->pid <= 0)
2402                 return;
2403
2404         fprintf(f,
2405                 "%sPID: "PID_FMT"\n",
2406                 prefix, s->pid);
2407
2408         if (s->start_timestamp.realtime > 0)
2409                 fprintf(f,
2410                         "%sStart Timestamp: %s\n",
2411                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2412
2413         if (s->exit_timestamp.realtime > 0)
2414                 fprintf(f,
2415                         "%sExit Timestamp: %s\n"
2416                         "%sExit Code: %s\n"
2417                         "%sExit Status: %i\n",
2418                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2419                         prefix, sigchld_code_to_string(s->code),
2420                         prefix, s->status);
2421 }
2422
2423 char *exec_command_line(char **argv) {
2424         size_t k;
2425         char *n, *p, **a;
2426         bool first = true;
2427
2428         assert(argv);
2429
2430         k = 1;
2431         STRV_FOREACH(a, argv)
2432                 k += strlen(*a)+3;
2433
2434         if (!(n = new(char, k)))
2435                 return NULL;
2436
2437         p = n;
2438         STRV_FOREACH(a, argv) {
2439
2440                 if (!first)
2441                         *(p++) = ' ';
2442                 else
2443                         first = false;
2444
2445                 if (strpbrk(*a, WHITESPACE)) {
2446                         *(p++) = '\'';
2447                         p = stpcpy(p, *a);
2448                         *(p++) = '\'';
2449                 } else
2450                         p = stpcpy(p, *a);
2451
2452         }
2453
2454         *p = 0;
2455
2456         /* FIXME: this doesn't really handle arguments that have
2457          * spaces and ticks in them */
2458
2459         return n;
2460 }
2461
2462 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2463         char *p2;
2464         const char *prefix2;
2465
2466         char *cmd;
2467
2468         assert(c);
2469         assert(f);
2470
2471         if (!prefix)
2472                 prefix = "";
2473         p2 = strappend(prefix, "\t");
2474         prefix2 = p2 ? p2 : prefix;
2475
2476         cmd = exec_command_line(c->argv);
2477
2478         fprintf(f,
2479                 "%sCommand Line: %s\n",
2480                 prefix, cmd ? cmd : strerror(ENOMEM));
2481
2482         free(cmd);
2483
2484         exec_status_dump(&c->exec_status, f, prefix2);
2485
2486         free(p2);
2487 }
2488
2489 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2490         assert(f);
2491
2492         if (!prefix)
2493                 prefix = "";
2494
2495         LIST_FOREACH(command, c, c)
2496                 exec_command_dump(c, f, prefix);
2497 }
2498
2499 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2500         ExecCommand *end;
2501
2502         assert(l);
2503         assert(e);
2504
2505         if (*l) {
2506                 /* It's kind of important, that we keep the order here */
2507                 LIST_FIND_TAIL(command, *l, end);
2508                 LIST_INSERT_AFTER(command, *l, end, e);
2509         } else
2510               *l = e;
2511 }
2512
2513 int exec_command_set(ExecCommand *c, const char *path, ...) {
2514         va_list ap;
2515         char **l, *p;
2516
2517         assert(c);
2518         assert(path);
2519
2520         va_start(ap, path);
2521         l = strv_new_ap(path, ap);
2522         va_end(ap);
2523
2524         if (!l)
2525                 return -ENOMEM;
2526
2527         p = strdup(path);
2528         if (!p) {
2529                 strv_free(l);
2530                 return -ENOMEM;
2531         }
2532
2533         free(c->path);
2534         c->path = p;
2535
2536         strv_free(c->argv);
2537         c->argv = l;
2538
2539         return 0;
2540 }
2541
2542 static int exec_runtime_allocate(ExecRuntime **rt) {
2543
2544         if (*rt)
2545                 return 0;
2546
2547         *rt = new0(ExecRuntime, 1);
2548         if (!*rt)
2549                 return -ENOMEM;
2550
2551         (*rt)->n_ref = 1;
2552         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2553
2554         return 0;
2555 }
2556
2557 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2558         int r;
2559
2560         assert(rt);
2561         assert(c);
2562         assert(id);
2563
2564         if (*rt)
2565                 return 1;
2566
2567         if (!c->private_network && !c->private_tmp)
2568                 return 0;
2569
2570         r = exec_runtime_allocate(rt);
2571         if (r < 0)
2572                 return r;
2573
2574         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2575                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2576                         return -errno;
2577         }
2578
2579         if (c->private_tmp && !(*rt)->tmp_dir) {
2580                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2581                 if (r < 0)
2582                         return r;
2583         }
2584
2585         return 1;
2586 }
2587
2588 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2589         assert(r);
2590         assert(r->n_ref > 0);
2591
2592         r->n_ref++;
2593         return r;
2594 }
2595
2596 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2597
2598         if (!r)
2599                 return NULL;
2600
2601         assert(r->n_ref > 0);
2602
2603         r->n_ref--;
2604         if (r->n_ref <= 0) {
2605                 free(r->tmp_dir);
2606                 free(r->var_tmp_dir);
2607                 safe_close_pair(r->netns_storage_socket);
2608                 free(r);
2609         }
2610
2611         return NULL;
2612 }
2613
2614 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2615         assert(u);
2616         assert(f);
2617         assert(fds);
2618
2619         if (!rt)
2620                 return 0;
2621
2622         if (rt->tmp_dir)
2623                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2624
2625         if (rt->var_tmp_dir)
2626                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2627
2628         if (rt->netns_storage_socket[0] >= 0) {
2629                 int copy;
2630
2631                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2632                 if (copy < 0)
2633                         return copy;
2634
2635                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2636         }
2637
2638         if (rt->netns_storage_socket[1] >= 0) {
2639                 int copy;
2640
2641                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2642                 if (copy < 0)
2643                         return copy;
2644
2645                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2646         }
2647
2648         return 0;
2649 }
2650
2651 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2652         int r;
2653
2654         assert(rt);
2655         assert(key);
2656         assert(value);
2657
2658         if (streq(key, "tmp-dir")) {
2659                 char *copy;
2660
2661                 r = exec_runtime_allocate(rt);
2662                 if (r < 0)
2663                         return r;
2664
2665                 copy = strdup(value);
2666                 if (!copy)
2667                         return log_oom();
2668
2669                 free((*rt)->tmp_dir);
2670                 (*rt)->tmp_dir = copy;
2671
2672         } else if (streq(key, "var-tmp-dir")) {
2673                 char *copy;
2674
2675                 r = exec_runtime_allocate(rt);
2676                 if (r < 0)
2677                         return r;
2678
2679                 copy = strdup(value);
2680                 if (!copy)
2681                         return log_oom();
2682
2683                 free((*rt)->var_tmp_dir);
2684                 (*rt)->var_tmp_dir = copy;
2685
2686         } else if (streq(key, "netns-socket-0")) {
2687                 int fd;
2688
2689                 r = exec_runtime_allocate(rt);
2690                 if (r < 0)
2691                         return r;
2692
2693                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2694                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2695                 else {
2696                         safe_close((*rt)->netns_storage_socket[0]);
2697                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2698                 }
2699         } else if (streq(key, "netns-socket-1")) {
2700                 int fd;
2701
2702                 r = exec_runtime_allocate(rt);
2703                 if (r < 0)
2704                         return r;
2705
2706                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2707                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2708                 else {
2709                         safe_close((*rt)->netns_storage_socket[1]);
2710                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2711                 }
2712         } else
2713                 return 0;
2714
2715         return 1;
2716 }
2717
2718 static void *remove_tmpdir_thread(void *p) {
2719         _cleanup_free_ char *path = p;
2720
2721         rm_rf_dangerous(path, false, true, false);
2722         return NULL;
2723 }
2724
2725 void exec_runtime_destroy(ExecRuntime *rt) {
2726         int r;
2727
2728         if (!rt)
2729                 return;
2730
2731         /* If there are multiple users of this, let's leave the stuff around */
2732         if (rt->n_ref > 1)
2733                 return;
2734
2735         if (rt->tmp_dir) {
2736                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2737
2738                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2739                 if (r < 0) {
2740                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2741                         free(rt->tmp_dir);
2742                 }
2743
2744                 rt->tmp_dir = NULL;
2745         }
2746
2747         if (rt->var_tmp_dir) {
2748                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2749
2750                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2751                 if (r < 0) {
2752                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2753                         free(rt->var_tmp_dir);
2754                 }
2755
2756                 rt->var_tmp_dir = NULL;
2757         }
2758
2759         safe_close_pair(rt->netns_storage_socket);
2760 }
2761
2762 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2763         [EXEC_INPUT_NULL] = "null",
2764         [EXEC_INPUT_TTY] = "tty",
2765         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2766         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2767         [EXEC_INPUT_SOCKET] = "socket"
2768 };
2769
2770 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2771
2772 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2773         [EXEC_OUTPUT_INHERIT] = "inherit",
2774         [EXEC_OUTPUT_NULL] = "null",
2775         [EXEC_OUTPUT_TTY] = "tty",
2776         [EXEC_OUTPUT_SYSLOG] = "syslog",
2777         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2778         [EXEC_OUTPUT_KMSG] = "kmsg",
2779         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2780         [EXEC_OUTPUT_JOURNAL] = "journal",
2781         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2782         [EXEC_OUTPUT_SOCKET] = "socket"
2783 };
2784
2785 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);