chiark / gitweb /
build-sys: prefer using ln --relative -s where appropriate
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "tcpwrap.h"
73 #include "exit-status.h"
74 #include "missing.h"
75 #include "utmp-wtmp.h"
76 #include "def.h"
77 #include "path-util.h"
78 #include "env-util.h"
79 #include "fileio.h"
80 #include "unit.h"
81 #include "async.h"
82 #include "selinux-util.h"
83 #include "errno-list.h"
84 #include "af-list.h"
85 #include "apparmor-util.h"
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100         int start, restart_from;
101
102         if (n_fds <= 0)
103                 return 0;
104
105         /* Modifies the fds array! (sorts it) */
106
107         assert(fds);
108
109         start = 0;
110         for (;;) {
111                 int i;
112
113                 restart_from = -1;
114
115                 for (i = start; i < (int) n_fds; i++) {
116                         int nfd;
117
118                         /* Already at right index? */
119                         if (fds[i] == i+3)
120                                 continue;
121
122                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123                                 return -errno;
124
125                         close_nointr_nofail(fds[i]);
126                         fds[i] = nfd;
127
128                         /* Hmm, the fd we wanted isn't free? Then
129                          * let's remember that and try again from here*/
130                         if (nfd != i+3 && restart_from < 0)
131                                 restart_from = i;
132                 }
133
134                 if (restart_from < 0)
135                         break;
136
137                 start = restart_from;
138         }
139
140         return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144         unsigned i;
145         int r;
146
147         if (n_fds <= 0)
148                 return 0;
149
150         assert(fds);
151
152         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154         for (i = 0; i < n_fds; i++) {
155
156                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157                         return r;
158
159                 /* We unconditionally drop FD_CLOEXEC from the fds,
160                  * since after all we want to pass these fds to our
161                  * children */
162
163                 if ((r = fd_cloexec(fds[i], false)) < 0)
164                         return r;
165         }
166
167         return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171         assert(context);
172
173         if (context->tty_path)
174                 return context->tty_path;
175
176         return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180         assert(context);
181
182         if (context->tty_vhangup)
183                 terminal_vhangup(tty_path(context));
184
185         if (context->tty_reset)
186                 reset_terminal(tty_path(context));
187
188         if (context->tty_vt_disallocate && context->tty_path)
189                 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193         return
194                 o == EXEC_OUTPUT_TTY ||
195                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201         int fd, r;
202
203         assert(nfd >= 0);
204
205         fd = open("/dev/null", flags|O_NOCTTY);
206         if (fd < 0)
207                 return -errno;
208
209         if (fd != nfd) {
210                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211                 close_nointr_nofail(fd);
212         } else
213                 r = nfd;
214
215         return r;
216 }
217
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
219         int fd, r;
220         union sockaddr_union sa = {
221                 .un.sun_family = AF_UNIX,
222                 .un.sun_path = "/run/systemd/journal/stdout",
223         };
224
225         assert(context);
226         assert(output < _EXEC_OUTPUT_MAX);
227         assert(ident);
228         assert(nfd >= 0);
229
230         fd = socket(AF_UNIX, SOCK_STREAM, 0);
231         if (fd < 0)
232                 return -errno;
233
234         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
235         if (r < 0) {
236                 close_nointr_nofail(fd);
237                 return -errno;
238         }
239
240         if (shutdown(fd, SHUT_RD) < 0) {
241                 close_nointr_nofail(fd);
242                 return -errno;
243         }
244
245         fd_inc_sndbuf(fd, SNDBUF_SIZE);
246
247         dprintf(fd,
248                 "%s\n"
249                 "%s\n"
250                 "%i\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n",
255                 context->syslog_identifier ? context->syslog_identifier : ident,
256                 unit_id,
257                 context->syslog_priority,
258                 !!context->syslog_level_prefix,
259                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261                 is_terminal_output(output));
262
263         if (fd != nfd) {
264                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
265                 close_nointr_nofail(fd);
266         } else
267                 r = nfd;
268
269         return r;
270 }
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
272         int fd, r;
273
274         assert(path);
275         assert(nfd >= 0);
276
277         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
278                 return fd;
279
280         if (fd != nfd) {
281                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
282                 close_nointr_nofail(fd);
283         } else
284                 r = nfd;
285
286         return r;
287 }
288
289 static bool is_terminal_input(ExecInput i) {
290         return
291                 i == EXEC_INPUT_TTY ||
292                 i == EXEC_INPUT_TTY_FORCE ||
293                 i == EXEC_INPUT_TTY_FAIL;
294 }
295
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
297
298         if (is_terminal_input(std_input) && !apply_tty_stdin)
299                 return EXEC_INPUT_NULL;
300
301         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302                 return EXEC_INPUT_NULL;
303
304         return std_input;
305 }
306
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
308
309         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310                 return EXEC_OUTPUT_INHERIT;
311
312         return std_output;
313 }
314
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
316         ExecInput i;
317
318         assert(context);
319
320         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
321
322         switch (i) {
323
324         case EXEC_INPUT_NULL:
325                 return open_null_as(O_RDONLY, STDIN_FILENO);
326
327         case EXEC_INPUT_TTY:
328         case EXEC_INPUT_TTY_FORCE:
329         case EXEC_INPUT_TTY_FAIL: {
330                 int fd, r;
331
332                 fd = acquire_terminal(tty_path(context),
333                                       i == EXEC_INPUT_TTY_FAIL,
334                                       i == EXEC_INPUT_TTY_FORCE,
335                                       false,
336                                       (usec_t) -1);
337                 if (fd < 0)
338                         return fd;
339
340                 if (fd != STDIN_FILENO) {
341                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
342                         close_nointr_nofail(fd);
343                 } else
344                         r = STDIN_FILENO;
345
346                 return r;
347         }
348
349         case EXEC_INPUT_SOCKET:
350                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
351
352         default:
353                 assert_not_reached("Unknown input type");
354         }
355 }
356
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
358         ExecOutput o;
359         ExecInput i;
360         int r;
361
362         assert(context);
363         assert(ident);
364
365         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366         o = fixup_output(context->std_output, socket_fd);
367
368         if (fileno == STDERR_FILENO) {
369                 ExecOutput e;
370                 e = fixup_output(context->std_error, socket_fd);
371
372                 /* This expects the input and output are already set up */
373
374                 /* Don't change the stderr file descriptor if we inherit all
375                  * the way and are not on a tty */
376                 if (e == EXEC_OUTPUT_INHERIT &&
377                     o == EXEC_OUTPUT_INHERIT &&
378                     i == EXEC_INPUT_NULL &&
379                     !is_terminal_input(context->std_input) &&
380                     getppid () != 1)
381                         return fileno;
382
383                 /* Duplicate from stdout if possible */
384                 if (e == o || e == EXEC_OUTPUT_INHERIT)
385                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 o = e;
388
389         } else if (o == EXEC_OUTPUT_INHERIT) {
390                 /* If input got downgraded, inherit the original value */
391                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
393
394                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395                 if (i != EXEC_INPUT_NULL)
396                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
397
398                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
399                 if (getppid() != 1)
400                         return fileno;
401
402                 /* We need to open /dev/null here anew, to get the right access mode. */
403                 return open_null_as(O_WRONLY, fileno);
404         }
405
406         switch (o) {
407
408         case EXEC_OUTPUT_NULL:
409                 return open_null_as(O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_TTY:
412                 if (is_terminal_input(i))
413                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
414
415                 /* We don't reset the terminal if this is just about output */
416                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
417
418         case EXEC_OUTPUT_SYSLOG:
419         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420         case EXEC_OUTPUT_KMSG:
421         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422         case EXEC_OUTPUT_JOURNAL:
423         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424                 r = connect_logger_as(context, o, ident, unit_id, fileno);
425                 if (r < 0) {
426                         log_struct_unit(LOG_CRIT, unit_id,
427                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428                                 fileno == STDOUT_FILENO ? "out" : "err",
429                                 unit_id, strerror(-r),
430                                 "ERRNO=%d", -r,
431                                 NULL);
432                         r = open_null_as(O_WRONLY, fileno);
433                 }
434                 return r;
435
436         case EXEC_OUTPUT_SOCKET:
437                 assert(socket_fd >= 0);
438                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
439
440         default:
441                 assert_not_reached("Unknown error type");
442         }
443 }
444
445 static int chown_terminal(int fd, uid_t uid) {
446         struct stat st;
447
448         assert(fd >= 0);
449
450         /* This might fail. What matters are the results. */
451         (void) fchown(fd, uid, -1);
452         (void) fchmod(fd, TTY_MODE);
453
454         if (fstat(fd, &st) < 0)
455                 return -errno;
456
457         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
458                 return -EPERM;
459
460         return 0;
461 }
462
463 static int setup_confirm_stdio(int *_saved_stdin,
464                                int *_saved_stdout) {
465         int fd = -1, saved_stdin, saved_stdout = -1, r;
466
467         assert(_saved_stdin);
468         assert(_saved_stdout);
469
470         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
471         if (saved_stdin < 0)
472                 return -errno;
473
474         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475         if (saved_stdout < 0) {
476                 r = errno;
477                 goto fail;
478         }
479
480         fd = acquire_terminal(
481                         "/dev/console",
482                         false,
483                         false,
484                         false,
485                         DEFAULT_CONFIRM_USEC);
486         if (fd < 0) {
487                 r = fd;
488                 goto fail;
489         }
490
491         r = chown_terminal(fd, getuid());
492         if (r < 0)
493                 goto fail;
494
495         if (dup2(fd, STDIN_FILENO) < 0) {
496                 r = -errno;
497                 goto fail;
498         }
499
500         if (dup2(fd, STDOUT_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (fd >= 2)
506                 close_nointr_nofail(fd);
507
508         *_saved_stdin = saved_stdin;
509         *_saved_stdout = saved_stdout;
510
511         return 0;
512
513 fail:
514         if (saved_stdout >= 0)
515                 close_nointr_nofail(saved_stdout);
516
517         if (saved_stdin >= 0)
518                 close_nointr_nofail(saved_stdin);
519
520         if (fd >= 0)
521                 close_nointr_nofail(fd);
522
523         return r;
524 }
525
526 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
527         int fd;
528         va_list ap;
529
530         assert(format);
531
532         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
533         if (fd < 0)
534                 return fd;
535
536         va_start(ap, format);
537         vdprintf(fd, format, ap);
538         va_end(ap);
539
540         close_nointr_nofail(fd);
541
542         return 0;
543 }
544
545 static int restore_confirm_stdio(int *saved_stdin,
546                                  int *saved_stdout) {
547
548         int r = 0;
549
550         assert(saved_stdin);
551         assert(saved_stdout);
552
553         release_terminal();
554
555         if (*saved_stdin >= 0)
556                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
557                         r = -errno;
558
559         if (*saved_stdout >= 0)
560                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
561                         r = -errno;
562
563         if (*saved_stdin >= 0)
564                 close_nointr_nofail(*saved_stdin);
565
566         if (*saved_stdout >= 0)
567                 close_nointr_nofail(*saved_stdout);
568
569         return r;
570 }
571
572 static int ask_for_confirmation(char *response, char **argv) {
573         int saved_stdout = -1, saved_stdin = -1, r;
574         char *line;
575
576         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
577         if (r < 0)
578                 return r;
579
580         line = exec_command_line(argv);
581         if (!line)
582                 return -ENOMEM;
583
584         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
585         free(line);
586
587         restore_confirm_stdio(&saved_stdin, &saved_stdout);
588
589         return r;
590 }
591
592 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
593         bool keep_groups = false;
594         int r;
595
596         assert(context);
597
598         /* Lookup and set GID and supplementary group list. Here too
599          * we avoid NSS lookups for gid=0. */
600
601         if (context->group || username) {
602
603                 if (context->group) {
604                         const char *g = context->group;
605
606                         if ((r = get_group_creds(&g, &gid)) < 0)
607                                 return r;
608                 }
609
610                 /* First step, initialize groups from /etc/groups */
611                 if (username && gid != 0) {
612                         if (initgroups(username, gid) < 0)
613                                 return -errno;
614
615                         keep_groups = true;
616                 }
617
618                 /* Second step, set our gids */
619                 if (setresgid(gid, gid, gid) < 0)
620                         return -errno;
621         }
622
623         if (context->supplementary_groups) {
624                 int ngroups_max, k;
625                 gid_t *gids;
626                 char **i;
627
628                 /* Final step, initialize any manually set supplementary groups */
629                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
630
631                 if (!(gids = new(gid_t, ngroups_max)))
632                         return -ENOMEM;
633
634                 if (keep_groups) {
635                         if ((k = getgroups(ngroups_max, gids)) < 0) {
636                                 free(gids);
637                                 return -errno;
638                         }
639                 } else
640                         k = 0;
641
642                 STRV_FOREACH(i, context->supplementary_groups) {
643                         const char *g;
644
645                         if (k >= ngroups_max) {
646                                 free(gids);
647                                 return -E2BIG;
648                         }
649
650                         g = *i;
651                         r = get_group_creds(&g, gids+k);
652                         if (r < 0) {
653                                 free(gids);
654                                 return r;
655                         }
656
657                         k++;
658                 }
659
660                 if (setgroups(k, gids) < 0) {
661                         free(gids);
662                         return -errno;
663                 }
664
665                 free(gids);
666         }
667
668         return 0;
669 }
670
671 static int enforce_user(const ExecContext *context, uid_t uid) {
672         assert(context);
673
674         /* Sets (but doesn't lookup) the uid and make sure we keep the
675          * capabilities while doing so. */
676
677         if (context->capabilities) {
678                 _cleanup_cap_free_ cap_t d = NULL;
679                 static const cap_value_t bits[] = {
680                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
681                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
682                 };
683
684                 /* First step: If we need to keep capabilities but
685                  * drop privileges we need to make sure we keep our
686                  * caps, while we drop privileges. */
687                 if (uid != 0) {
688                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
689
690                         if (prctl(PR_GET_SECUREBITS) != sb)
691                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
692                                         return -errno;
693                 }
694
695                 /* Second step: set the capabilities. This will reduce
696                  * the capabilities to the minimum we need. */
697
698                 d = cap_dup(context->capabilities);
699                 if (!d)
700                         return -errno;
701
702                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
703                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
704                         return -errno;
705
706                 if (cap_set_proc(d) < 0)
707                         return -errno;
708         }
709
710         /* Third step: actually set the uids */
711         if (setresuid(uid, uid, uid) < 0)
712                 return -errno;
713
714         /* At this point we should have all necessary capabilities but
715            are otherwise a normal user. However, the caps might got
716            corrupted due to the setresuid() so we need clean them up
717            later. This is done outside of this call. */
718
719         return 0;
720 }
721
722 #ifdef HAVE_PAM
723
724 static int null_conv(
725                 int num_msg,
726                 const struct pam_message **msg,
727                 struct pam_response **resp,
728                 void *appdata_ptr) {
729
730         /* We don't support conversations */
731
732         return PAM_CONV_ERR;
733 }
734
735 static int setup_pam(
736                 const char *name,
737                 const char *user,
738                 uid_t uid,
739                 const char *tty,
740                 char ***pam_env,
741                 int fds[], unsigned n_fds) {
742
743         static const struct pam_conv conv = {
744                 .conv = null_conv,
745                 .appdata_ptr = NULL
746         };
747
748         pam_handle_t *handle = NULL;
749         sigset_t ss, old_ss;
750         int pam_code = PAM_SUCCESS;
751         int err;
752         char **e = NULL;
753         bool close_session = false;
754         pid_t pam_pid = 0, parent_pid;
755         int flags = 0;
756
757         assert(name);
758         assert(user);
759         assert(pam_env);
760
761         /* We set up PAM in the parent process, then fork. The child
762          * will then stay around until killed via PR_GET_PDEATHSIG or
763          * systemd via the cgroup logic. It will then remove the PAM
764          * session again. The parent process will exec() the actual
765          * daemon. We do things this way to ensure that the main PID
766          * of the daemon is the one we initially fork()ed. */
767
768         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
769                 flags |= PAM_SILENT;
770
771         pam_code = pam_start(name, user, &conv, &handle);
772         if (pam_code != PAM_SUCCESS) {
773                 handle = NULL;
774                 goto fail;
775         }
776
777         if (tty) {
778                 pam_code = pam_set_item(handle, PAM_TTY, tty);
779                 if (pam_code != PAM_SUCCESS)
780                         goto fail;
781         }
782
783         pam_code = pam_acct_mgmt(handle, flags);
784         if (pam_code != PAM_SUCCESS)
785                 goto fail;
786
787         pam_code = pam_open_session(handle, flags);
788         if (pam_code != PAM_SUCCESS)
789                 goto fail;
790
791         close_session = true;
792
793         e = pam_getenvlist(handle);
794         if (!e) {
795                 pam_code = PAM_BUF_ERR;
796                 goto fail;
797         }
798
799         /* Block SIGTERM, so that we know that it won't get lost in
800          * the child */
801         if (sigemptyset(&ss) < 0 ||
802             sigaddset(&ss, SIGTERM) < 0 ||
803             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
804                 goto fail;
805
806         parent_pid = getpid();
807
808         pam_pid = fork();
809         if (pam_pid < 0)
810                 goto fail;
811
812         if (pam_pid == 0) {
813                 int sig;
814                 int r = EXIT_PAM;
815
816                 /* The child's job is to reset the PAM session on
817                  * termination */
818
819                 /* This string must fit in 10 chars (i.e. the length
820                  * of "/sbin/init"), to look pretty in /bin/ps */
821                 rename_process("(sd-pam)");
822
823                 /* Make sure we don't keep open the passed fds in this
824                 child. We assume that otherwise only those fds are
825                 open here that have been opened by PAM. */
826                 close_many(fds, n_fds);
827
828                 /* Drop privileges - we don't need any to pam_close_session
829                  * and this will make PR_SET_PDEATHSIG work in most cases.
830                  * If this fails, ignore the error - but expect sd-pam threads
831                  * to fail to exit normally */
832                 if (setresuid(uid, uid, uid) < 0)
833                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
834
835                 /* Wait until our parent died. This will only work if
836                  * the above setresuid() succeeds, otherwise the kernel
837                  * will not allow unprivileged parents kill their privileged
838                  * children this way. We rely on the control groups kill logic
839                  * to do the rest for us. */
840                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
841                         goto child_finish;
842
843                 /* Check if our parent process might already have
844                  * died? */
845                 if (getppid() == parent_pid) {
846                         for (;;) {
847                                 if (sigwait(&ss, &sig) < 0) {
848                                         if (errno == EINTR)
849                                                 continue;
850
851                                         goto child_finish;
852                                 }
853
854                                 assert(sig == SIGTERM);
855                                 break;
856                         }
857                 }
858
859                 /* If our parent died we'll end the session */
860                 if (getppid() != parent_pid) {
861                         pam_code = pam_close_session(handle, flags);
862                         if (pam_code != PAM_SUCCESS)
863                                 goto child_finish;
864                 }
865
866                 r = 0;
867
868         child_finish:
869                 pam_end(handle, pam_code | flags);
870                 _exit(r);
871         }
872
873         /* If the child was forked off successfully it will do all the
874          * cleanups, so forget about the handle here. */
875         handle = NULL;
876
877         /* Unblock SIGTERM again in the parent */
878         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
879                 goto fail;
880
881         /* We close the log explicitly here, since the PAM modules
882          * might have opened it, but we don't want this fd around. */
883         closelog();
884
885         *pam_env = e;
886         e = NULL;
887
888         return 0;
889
890 fail:
891         if (pam_code != PAM_SUCCESS) {
892                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
893                 err = -EPERM;  /* PAM errors do not map to errno */
894         } else {
895                 log_error("PAM failed: %m");
896                 err = -errno;
897         }
898
899         if (handle) {
900                 if (close_session)
901                         pam_code = pam_close_session(handle, flags);
902
903                 pam_end(handle, pam_code | flags);
904         }
905
906         strv_free(e);
907
908         closelog();
909
910         if (pam_pid > 1) {
911                 kill(pam_pid, SIGTERM);
912                 kill(pam_pid, SIGCONT);
913         }
914
915         return err;
916 }
917 #endif
918
919 static void rename_process_from_path(const char *path) {
920         char process_name[11];
921         const char *p;
922         size_t l;
923
924         /* This resulting string must fit in 10 chars (i.e. the length
925          * of "/sbin/init") to look pretty in /bin/ps */
926
927         p = basename(path);
928         if (isempty(p)) {
929                 rename_process("(...)");
930                 return;
931         }
932
933         l = strlen(p);
934         if (l > 8) {
935                 /* The end of the process name is usually more
936                  * interesting, since the first bit might just be
937                  * "systemd-" */
938                 p = p + l - 8;
939                 l = 8;
940         }
941
942         process_name[0] = '(';
943         memcpy(process_name+1, p, l);
944         process_name[1+l] = ')';
945         process_name[1+l+1] = 0;
946
947         rename_process(process_name);
948 }
949
950 #ifdef HAVE_SECCOMP
951
952 static int apply_seccomp(ExecContext *c) {
953         uint32_t negative_action, action;
954         scmp_filter_ctx *seccomp;
955         Iterator i;
956         void *id;
957         int r;
958
959         assert(c);
960
961         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
962
963         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
964         if (!seccomp)
965                 return -ENOMEM;
966
967         if (c->syscall_archs) {
968
969                 SET_FOREACH(id, c->syscall_archs, i) {
970                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
971                         if (r == -EEXIST)
972                                 continue;
973                         if (r < 0)
974                                 goto finish;
975                 }
976
977         } else {
978                 r = seccomp_add_secondary_archs(seccomp);
979                 if (r < 0)
980                         goto finish;
981         }
982
983         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
984         SET_FOREACH(id, c->syscall_filter, i) {
985                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
986                 if (r < 0)
987                         goto finish;
988         }
989
990         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
991         if (r < 0)
992                 goto finish;
993
994         r = seccomp_load(seccomp);
995
996 finish:
997         seccomp_release(seccomp);
998         return r;
999 }
1000
1001 static int apply_address_families(ExecContext *c) {
1002         scmp_filter_ctx *seccomp;
1003         Iterator i;
1004         int r;
1005
1006         assert(c);
1007
1008         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1009         if (!seccomp)
1010                 return -ENOMEM;
1011
1012         r = seccomp_add_secondary_archs(seccomp);
1013         if (r < 0)
1014                 goto finish;
1015
1016         if (c->address_families_whitelist) {
1017                 int af, first = 0, last = 0;
1018                 void *afp;
1019
1020                 /* If this is a whitelist, we first block the address
1021                  * families that are out of range and then everything
1022                  * that is not in the set. First, we find the lowest
1023                  * and highest address family in the set. */
1024
1025                 SET_FOREACH(afp, c->address_families, i) {
1026                         af = PTR_TO_INT(afp);
1027
1028                         if (af <= 0 || af >= af_max())
1029                                 continue;
1030
1031                         if (first == 0 || af < first)
1032                                 first = af;
1033
1034                         if (last == 0 || af > last)
1035                                 last = af;
1036                 }
1037
1038                 assert((first == 0) == (last == 0));
1039
1040                 if (first == 0) {
1041
1042                         /* No entries in the valid range, block everything */
1043                         r = seccomp_rule_add(
1044                                         seccomp,
1045                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1046                                         SCMP_SYS(socket),
1047                                         0);
1048                         if (r < 0)
1049                                 goto finish;
1050
1051                 } else {
1052
1053                         /* Block everything below the first entry */
1054                         r = seccomp_rule_add(
1055                                         seccomp,
1056                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1057                                         SCMP_SYS(socket),
1058                                         1,
1059                                         SCMP_A0(SCMP_CMP_LT, first));
1060                         if (r < 0)
1061                                 goto finish;
1062
1063                         /* Block everything above the last entry */
1064                         r = seccomp_rule_add(
1065                                         seccomp,
1066                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1067                                         SCMP_SYS(socket),
1068                                         1,
1069                                         SCMP_A0(SCMP_CMP_GT, last));
1070                         if (r < 0)
1071                                 goto finish;
1072
1073                         /* Block everything between the first and last
1074                          * entry */
1075                         for (af = 1; af < af_max(); af++) {
1076
1077                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1078                                         continue;
1079
1080                                 r = seccomp_rule_add(
1081                                                 seccomp,
1082                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1083                                                 SCMP_SYS(socket),
1084                                                 1,
1085                                                 SCMP_A0(SCMP_CMP_EQ, af));
1086                                 if (r < 0)
1087                                         goto finish;
1088                         }
1089                 }
1090
1091         } else {
1092                 void *af;
1093
1094                 /* If this is a blacklist, then generate one rule for
1095                  * each address family that are then combined in OR
1096                  * checks. */
1097
1098                 SET_FOREACH(af, c->address_families, i) {
1099
1100                         r = seccomp_rule_add(
1101                                         seccomp,
1102                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1103                                         SCMP_SYS(socket),
1104                                         1,
1105                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1106                         if (r < 0)
1107                                 goto finish;
1108                 }
1109         }
1110
1111         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1112         if (r < 0)
1113                 goto finish;
1114
1115         r = seccomp_load(seccomp);
1116
1117 finish:
1118         seccomp_release(seccomp);
1119         return r;
1120 }
1121
1122 #endif
1123
1124 static void do_idle_pipe_dance(int idle_pipe[4]) {
1125         assert(idle_pipe);
1126
1127         if (idle_pipe[1] >= 0)
1128                 close_nointr_nofail(idle_pipe[1]);
1129         if (idle_pipe[2] >= 0)
1130                 close_nointr_nofail(idle_pipe[2]);
1131
1132         if (idle_pipe[0] >= 0) {
1133                 int r;
1134
1135                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1136
1137                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1138                         /* Signal systemd that we are bored and want to continue. */
1139                         write(idle_pipe[3], "x", 1);
1140
1141                         /* Wait for systemd to react to the signal above. */
1142                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1143                 }
1144
1145                 close_nointr_nofail(idle_pipe[0]);
1146
1147         }
1148
1149         if (idle_pipe[3] >= 0)
1150                 close_nointr_nofail(idle_pipe[3]);
1151 }
1152
1153 static int build_environment(
1154                 ExecContext *c,
1155                 unsigned n_fds,
1156                 usec_t watchdog_usec,
1157                 const char *home,
1158                 const char *username,
1159                 const char *shell,
1160                 char ***ret) {
1161
1162         _cleanup_strv_free_ char **our_env = NULL;
1163         unsigned n_env = 0;
1164         char *x;
1165
1166         assert(c);
1167         assert(ret);
1168
1169         our_env = new0(char*, 10);
1170         if (!our_env)
1171                 return -ENOMEM;
1172
1173         if (n_fds > 0) {
1174                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1175                         return -ENOMEM;
1176                 our_env[n_env++] = x;
1177
1178                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1179                         return -ENOMEM;
1180                 our_env[n_env++] = x;
1181         }
1182
1183         if (watchdog_usec > 0) {
1184                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1185                         return -ENOMEM;
1186                 our_env[n_env++] = x;
1187
1188                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1189                         return -ENOMEM;
1190                 our_env[n_env++] = x;
1191         }
1192
1193         if (home) {
1194                 x = strappend("HOME=", home);
1195                 if (!x)
1196                         return -ENOMEM;
1197                 our_env[n_env++] = x;
1198         }
1199
1200         if (username) {
1201                 x = strappend("LOGNAME=", username);
1202                 if (!x)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205
1206                 x = strappend("USER=", username);
1207                 if (!x)
1208                         return -ENOMEM;
1209                 our_env[n_env++] = x;
1210         }
1211
1212         if (shell) {
1213                 x = strappend("SHELL=", shell);
1214                 if (!x)
1215                         return -ENOMEM;
1216                 our_env[n_env++] = x;
1217         }
1218
1219         if (is_terminal_input(c->std_input) ||
1220             c->std_output == EXEC_OUTPUT_TTY ||
1221             c->std_error == EXEC_OUTPUT_TTY ||
1222             c->tty_path) {
1223
1224                 x = strdup(default_term_for_tty(tty_path(c)));
1225                 if (!x)
1226                         return -ENOMEM;
1227                 our_env[n_env++] = x;
1228         }
1229
1230         our_env[n_env++] = NULL;
1231         assert(n_env <= 10);
1232
1233         *ret = our_env;
1234         our_env = NULL;
1235
1236         return 0;
1237 }
1238
1239 int exec_spawn(ExecCommand *command,
1240                char **argv,
1241                ExecContext *context,
1242                int fds[], unsigned n_fds,
1243                char **environment,
1244                bool apply_permissions,
1245                bool apply_chroot,
1246                bool apply_tty_stdin,
1247                bool confirm_spawn,
1248                CGroupControllerMask cgroup_supported,
1249                const char *cgroup_path,
1250                const char *unit_id,
1251                usec_t watchdog_usec,
1252                int idle_pipe[4],
1253                ExecRuntime *runtime,
1254                pid_t *ret) {
1255
1256         _cleanup_strv_free_ char **files_env = NULL;
1257         int socket_fd;
1258         char *line;
1259         pid_t pid;
1260         int r;
1261
1262         assert(command);
1263         assert(context);
1264         assert(ret);
1265         assert(fds || n_fds <= 0);
1266
1267         if (context->std_input == EXEC_INPUT_SOCKET ||
1268             context->std_output == EXEC_OUTPUT_SOCKET ||
1269             context->std_error == EXEC_OUTPUT_SOCKET) {
1270
1271                 if (n_fds != 1)
1272                         return -EINVAL;
1273
1274                 socket_fd = fds[0];
1275
1276                 fds = NULL;
1277                 n_fds = 0;
1278         } else
1279                 socket_fd = -1;
1280
1281         r = exec_context_load_environment(context, &files_env);
1282         if (r < 0) {
1283                 log_struct_unit(LOG_ERR,
1284                            unit_id,
1285                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1286                            "ERRNO=%d", -r,
1287                            NULL);
1288                 return r;
1289         }
1290
1291         if (!argv)
1292                 argv = command->argv;
1293
1294         line = exec_command_line(argv);
1295         if (!line)
1296                 return log_oom();
1297
1298         log_struct_unit(LOG_DEBUG,
1299                         unit_id,
1300                         "EXECUTABLE=%s", command->path,
1301                         "MESSAGE=About to execute: %s", line,
1302                         NULL);
1303         free(line);
1304
1305         pid = fork();
1306         if (pid < 0)
1307                 return -errno;
1308
1309         if (pid == 0) {
1310                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1311                 const char *username = NULL, *home = NULL, *shell = NULL;
1312                 unsigned n_dont_close = 0;
1313                 int dont_close[n_fds + 3];
1314                 uid_t uid = (uid_t) -1;
1315                 gid_t gid = (gid_t) -1;
1316                 sigset_t ss;
1317                 int i, err;
1318
1319                 /* child */
1320
1321                 rename_process_from_path(command->path);
1322
1323                 /* We reset exactly these signals, since they are the
1324                  * only ones we set to SIG_IGN in the main daemon. All
1325                  * others we leave untouched because we set them to
1326                  * SIG_DFL or a valid handler initially, both of which
1327                  * will be demoted to SIG_DFL. */
1328                 default_signals(SIGNALS_CRASH_HANDLER,
1329                                 SIGNALS_IGNORE, -1);
1330
1331                 if (context->ignore_sigpipe)
1332                         ignore_signals(SIGPIPE, -1);
1333
1334                 assert_se(sigemptyset(&ss) == 0);
1335                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1336                         err = -errno;
1337                         r = EXIT_SIGNAL_MASK;
1338                         goto fail_child;
1339                 }
1340
1341                 if (idle_pipe)
1342                         do_idle_pipe_dance(idle_pipe);
1343
1344                 /* Close sockets very early to make sure we don't
1345                  * block init reexecution because it cannot bind its
1346                  * sockets */
1347                 log_forget_fds();
1348
1349                 if (socket_fd >= 0)
1350                         dont_close[n_dont_close++] = socket_fd;
1351                 if (n_fds > 0) {
1352                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1353                         n_dont_close += n_fds;
1354                 }
1355                 if (runtime) {
1356                         if (runtime->netns_storage_socket[0] >= 0)
1357                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1358                         if (runtime->netns_storage_socket[1] >= 0)
1359                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1360                 }
1361
1362                 err = close_all_fds(dont_close, n_dont_close);
1363                 if (err < 0) {
1364                         r = EXIT_FDS;
1365                         goto fail_child;
1366                 }
1367
1368                 if (!context->same_pgrp)
1369                         if (setsid() < 0) {
1370                                 err = -errno;
1371                                 r = EXIT_SETSID;
1372                                 goto fail_child;
1373                         }
1374
1375                 if (context->tcpwrap_name) {
1376                         if (socket_fd >= 0)
1377                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1378                                         err = -EACCES;
1379                                         r = EXIT_TCPWRAP;
1380                                         goto fail_child;
1381                                 }
1382
1383                         for (i = 0; i < (int) n_fds; i++) {
1384                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1385                                         err = -EACCES;
1386                                         r = EXIT_TCPWRAP;
1387                                         goto fail_child;
1388                                 }
1389                         }
1390                 }
1391
1392                 exec_context_tty_reset(context);
1393
1394                 if (confirm_spawn) {
1395                         char response;
1396
1397                         err = ask_for_confirmation(&response, argv);
1398                         if (err == -ETIMEDOUT)
1399                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1400                         else if (err < 0)
1401                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1402                         else if (response == 's') {
1403                                 write_confirm_message("Skipping execution.\n");
1404                                 err = -ECANCELED;
1405                                 r = EXIT_CONFIRM;
1406                                 goto fail_child;
1407                         } else if (response == 'n') {
1408                                 write_confirm_message("Failing execution.\n");
1409                                 err = r = 0;
1410                                 goto fail_child;
1411                         }
1412                 }
1413
1414                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1415                  * must sure to drop O_NONBLOCK */
1416                 if (socket_fd >= 0)
1417                         fd_nonblock(socket_fd, false);
1418
1419                 err = setup_input(context, socket_fd, apply_tty_stdin);
1420                 if (err < 0) {
1421                         r = EXIT_STDIN;
1422                         goto fail_child;
1423                 }
1424
1425                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1426                 if (err < 0) {
1427                         r = EXIT_STDOUT;
1428                         goto fail_child;
1429                 }
1430
1431                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1432                 if (err < 0) {
1433                         r = EXIT_STDERR;
1434                         goto fail_child;
1435                 }
1436
1437                 if (cgroup_path) {
1438                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1439                         if (err < 0) {
1440                                 r = EXIT_CGROUP;
1441                                 goto fail_child;
1442                         }
1443                 }
1444
1445                 if (context->oom_score_adjust_set) {
1446                         char t[16];
1447
1448                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1449                         char_array_0(t);
1450
1451                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1452                                 err = -errno;
1453                                 r = EXIT_OOM_ADJUST;
1454                                 goto fail_child;
1455                         }
1456                 }
1457
1458                 if (context->nice_set)
1459                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1460                                 err = -errno;
1461                                 r = EXIT_NICE;
1462                                 goto fail_child;
1463                         }
1464
1465                 if (context->cpu_sched_set) {
1466                         struct sched_param param = {
1467                                 .sched_priority = context->cpu_sched_priority,
1468                         };
1469
1470                         r = sched_setscheduler(0,
1471                                                context->cpu_sched_policy |
1472                                                (context->cpu_sched_reset_on_fork ?
1473                                                 SCHED_RESET_ON_FORK : 0),
1474                                                &param);
1475                         if (r < 0) {
1476                                 err = -errno;
1477                                 r = EXIT_SETSCHEDULER;
1478                                 goto fail_child;
1479                         }
1480                 }
1481
1482                 if (context->cpuset)
1483                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1484                                 err = -errno;
1485                                 r = EXIT_CPUAFFINITY;
1486                                 goto fail_child;
1487                         }
1488
1489                 if (context->ioprio_set)
1490                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1491                                 err = -errno;
1492                                 r = EXIT_IOPRIO;
1493                                 goto fail_child;
1494                         }
1495
1496                 if (context->timer_slack_nsec != (nsec_t) -1)
1497                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1498                                 err = -errno;
1499                                 r = EXIT_TIMERSLACK;
1500                                 goto fail_child;
1501                         }
1502
1503                 if (context->personality != 0xffffffffUL)
1504                         if (personality(context->personality) < 0) {
1505                                 err = -errno;
1506                                 r = EXIT_PERSONALITY;
1507                                 goto fail_child;
1508                         }
1509
1510                 if (context->utmp_id)
1511                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1512
1513                 if (context->user) {
1514                         username = context->user;
1515                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1516                         if (err < 0) {
1517                                 r = EXIT_USER;
1518                                 goto fail_child;
1519                         }
1520
1521                         if (is_terminal_input(context->std_input)) {
1522                                 err = chown_terminal(STDIN_FILENO, uid);
1523                                 if (err < 0) {
1524                                         r = EXIT_STDIN;
1525                                         goto fail_child;
1526                                 }
1527                         }
1528                 }
1529
1530 #ifdef HAVE_PAM
1531                 if (cgroup_path && context->user && context->pam_name) {
1532                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1533                         if (err < 0) {
1534                                 r = EXIT_CGROUP;
1535                                 goto fail_child;
1536                         }
1537
1538
1539                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1540                         if (err < 0) {
1541                                 r = EXIT_CGROUP;
1542                                 goto fail_child;
1543                         }
1544                 }
1545 #endif
1546
1547                 if (apply_permissions) {
1548                         err = enforce_groups(context, username, gid);
1549                         if (err < 0) {
1550                                 r = EXIT_GROUP;
1551                                 goto fail_child;
1552                         }
1553                 }
1554
1555                 umask(context->umask);
1556
1557 #ifdef HAVE_PAM
1558                 if (apply_permissions && context->pam_name && username) {
1559                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1560                         if (err < 0) {
1561                                 r = EXIT_PAM;
1562                                 goto fail_child;
1563                         }
1564                 }
1565 #endif
1566                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1567                         err = setup_netns(runtime->netns_storage_socket);
1568                         if (err < 0) {
1569                                 r = EXIT_NETWORK;
1570                                 goto fail_child;
1571                         }
1572                 }
1573
1574                 if (!strv_isempty(context->read_write_dirs) ||
1575                     !strv_isempty(context->read_only_dirs) ||
1576                     !strv_isempty(context->inaccessible_dirs) ||
1577                     context->mount_flags != 0 ||
1578                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1579                     context->private_devices) {
1580
1581                         char *tmp = NULL, *var = NULL;
1582
1583                         /* The runtime struct only contains the parent
1584                          * of the private /tmp, which is
1585                          * non-accessible to world users. Inside of it
1586                          * there's a /tmp that is sticky, and that's
1587                          * the one we want to use here. */
1588
1589                         if (context->private_tmp && runtime) {
1590                                 if (runtime->tmp_dir)
1591                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1592                                 if (runtime->var_tmp_dir)
1593                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1594                         }
1595
1596                         err = setup_namespace(
1597                                         context->read_write_dirs,
1598                                         context->read_only_dirs,
1599                                         context->inaccessible_dirs,
1600                                         tmp,
1601                                         var,
1602                                         context->private_devices,
1603                                         context->mount_flags);
1604
1605                         if (err < 0) {
1606                                 r = EXIT_NAMESPACE;
1607                                 goto fail_child;
1608                         }
1609                 }
1610
1611                 if (apply_chroot) {
1612                         if (context->root_directory)
1613                                 if (chroot(context->root_directory) < 0) {
1614                                         err = -errno;
1615                                         r = EXIT_CHROOT;
1616                                         goto fail_child;
1617                                 }
1618
1619                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1620                                 err = -errno;
1621                                 r = EXIT_CHDIR;
1622                                 goto fail_child;
1623                         }
1624                 } else {
1625                         _cleanup_free_ char *d = NULL;
1626
1627                         if (asprintf(&d, "%s/%s",
1628                                      context->root_directory ? context->root_directory : "",
1629                                      context->working_directory ? context->working_directory : "") < 0) {
1630                                 err = -ENOMEM;
1631                                 r = EXIT_MEMORY;
1632                                 goto fail_child;
1633                         }
1634
1635                         if (chdir(d) < 0) {
1636                                 err = -errno;
1637                                 r = EXIT_CHDIR;
1638                                 goto fail_child;
1639                         }
1640                 }
1641
1642                 /* We repeat the fd closing here, to make sure that
1643                  * nothing is leaked from the PAM modules */
1644                 err = close_all_fds(fds, n_fds);
1645                 if (err >= 0)
1646                         err = shift_fds(fds, n_fds);
1647                 if (err >= 0)
1648                         err = flags_fds(fds, n_fds, context->non_blocking);
1649                 if (err < 0) {
1650                         r = EXIT_FDS;
1651                         goto fail_child;
1652                 }
1653
1654                 if (apply_permissions) {
1655
1656                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1657                                 if (!context->rlimit[i])
1658                                         continue;
1659
1660                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1661                                         err = -errno;
1662                                         r = EXIT_LIMITS;
1663                                         goto fail_child;
1664                                 }
1665                         }
1666
1667                         if (context->capability_bounding_set_drop) {
1668                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1669                                 if (err < 0) {
1670                                         r = EXIT_CAPABILITIES;
1671                                         goto fail_child;
1672                                 }
1673                         }
1674
1675                         if (context->user) {
1676                                 err = enforce_user(context, uid);
1677                                 if (err < 0) {
1678                                         r = EXIT_USER;
1679                                         goto fail_child;
1680                                 }
1681                         }
1682
1683                         /* PR_GET_SECUREBITS is not privileged, while
1684                          * PR_SET_SECUREBITS is. So to suppress
1685                          * potential EPERMs we'll try not to call
1686                          * PR_SET_SECUREBITS unless necessary. */
1687                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1688                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1689                                         err = -errno;
1690                                         r = EXIT_SECUREBITS;
1691                                         goto fail_child;
1692                                 }
1693
1694                         if (context->capabilities)
1695                                 if (cap_set_proc(context->capabilities) < 0) {
1696                                         err = -errno;
1697                                         r = EXIT_CAPABILITIES;
1698                                         goto fail_child;
1699                                 }
1700
1701                         if (context->no_new_privileges)
1702                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1703                                         err = -errno;
1704                                         r = EXIT_NO_NEW_PRIVILEGES;
1705                                         goto fail_child;
1706                                 }
1707
1708 #ifdef HAVE_SECCOMP
1709                         if (context->address_families_whitelist ||
1710                             !set_isempty(context->address_families)) {
1711                                 err = apply_address_families(context);
1712                                 if (err < 0) {
1713                                         r = EXIT_ADDRESS_FAMILIES;
1714                                         goto fail_child;
1715                                 }
1716                         }
1717
1718                         if (context->syscall_whitelist ||
1719                             !set_isempty(context->syscall_filter) ||
1720                             !set_isempty(context->syscall_archs)) {
1721                                 err = apply_seccomp(context);
1722                                 if (err < 0) {
1723                                         r = EXIT_SECCOMP;
1724                                         goto fail_child;
1725                                 }
1726                         }
1727 #endif
1728
1729 #ifdef HAVE_SELINUX
1730                         if (context->selinux_context && use_selinux()) {
1731                                 err = setexeccon(context->selinux_context);
1732                                 if (err < 0 && !context->selinux_context_ignore) {
1733                                         r = EXIT_SELINUX_CONTEXT;
1734                                         goto fail_child;
1735                                 }
1736                         }
1737 #endif
1738
1739 #ifdef HAVE_APPARMOR
1740                         if (context->apparmor_profile && use_apparmor()) {
1741                                 err = aa_change_onexec(context->apparmor_profile);
1742                                 if (err < 0 && !context->apparmor_profile_ignore) {
1743                                         r = EXIT_APPARMOR_PROFILE;
1744                                         goto fail_child;
1745                                 }
1746                         }
1747 #endif
1748                 }
1749
1750                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1751                 if (r < 0) {
1752                         r = EXIT_MEMORY;
1753                         goto fail_child;
1754                 }
1755
1756                 final_env = strv_env_merge(5,
1757                                            environment,
1758                                            our_env,
1759                                            context->environment,
1760                                            files_env,
1761                                            pam_env,
1762                                            NULL);
1763                 if (!final_env) {
1764                         err = -ENOMEM;
1765                         r = EXIT_MEMORY;
1766                         goto fail_child;
1767                 }
1768
1769                 final_argv = replace_env_argv(argv, final_env);
1770                 if (!final_argv) {
1771                         err = -ENOMEM;
1772                         r = EXIT_MEMORY;
1773                         goto fail_child;
1774                 }
1775
1776                 final_env = strv_env_clean(final_env);
1777
1778                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1779                         line = exec_command_line(final_argv);
1780                         if (line) {
1781                                 log_open();
1782                                 log_struct_unit(LOG_DEBUG,
1783                                                 unit_id,
1784                                                 "EXECUTABLE=%s", command->path,
1785                                                 "MESSAGE=Executing: %s", line,
1786                                                 NULL);
1787                                 log_close();
1788                                 free(line);
1789                                 line = NULL;
1790                         }
1791                 }
1792                 execve(command->path, final_argv, final_env);
1793                 err = -errno;
1794                 r = EXIT_EXEC;
1795
1796         fail_child:
1797                 if (r != 0) {
1798                         log_open();
1799                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1800                                    "EXECUTABLE=%s", command->path,
1801                                    "MESSAGE=Failed at step %s spawning %s: %s",
1802                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1803                                           command->path, strerror(-err),
1804                                    "ERRNO=%d", -err,
1805                                    NULL);
1806                         log_close();
1807                 }
1808
1809                 _exit(r);
1810         }
1811
1812         log_struct_unit(LOG_DEBUG,
1813                         unit_id,
1814                         "MESSAGE=Forked %s as "PID_FMT,
1815                         command->path, pid,
1816                         NULL);
1817
1818         /* We add the new process to the cgroup both in the child (so
1819          * that we can be sure that no user code is ever executed
1820          * outside of the cgroup) and in the parent (so that we can be
1821          * sure that when we kill the cgroup the process will be
1822          * killed too). */
1823         if (cgroup_path)
1824                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1825
1826         exec_status_start(&command->exec_status, pid);
1827
1828         *ret = pid;
1829         return 0;
1830 }
1831
1832 void exec_context_init(ExecContext *c) {
1833         assert(c);
1834
1835         c->umask = 0022;
1836         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1837         c->cpu_sched_policy = SCHED_OTHER;
1838         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1839         c->syslog_level_prefix = true;
1840         c->ignore_sigpipe = true;
1841         c->timer_slack_nsec = (nsec_t) -1;
1842         c->personality = 0xffffffffUL;
1843 }
1844
1845 void exec_context_done(ExecContext *c) {
1846         unsigned l;
1847
1848         assert(c);
1849
1850         strv_free(c->environment);
1851         c->environment = NULL;
1852
1853         strv_free(c->environment_files);
1854         c->environment_files = NULL;
1855
1856         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1857                 free(c->rlimit[l]);
1858                 c->rlimit[l] = NULL;
1859         }
1860
1861         free(c->working_directory);
1862         c->working_directory = NULL;
1863         free(c->root_directory);
1864         c->root_directory = NULL;
1865
1866         free(c->tty_path);
1867         c->tty_path = NULL;
1868
1869         free(c->tcpwrap_name);
1870         c->tcpwrap_name = NULL;
1871
1872         free(c->syslog_identifier);
1873         c->syslog_identifier = NULL;
1874
1875         free(c->user);
1876         c->user = NULL;
1877
1878         free(c->group);
1879         c->group = NULL;
1880
1881         strv_free(c->supplementary_groups);
1882         c->supplementary_groups = NULL;
1883
1884         free(c->pam_name);
1885         c->pam_name = NULL;
1886
1887         if (c->capabilities) {
1888                 cap_free(c->capabilities);
1889                 c->capabilities = NULL;
1890         }
1891
1892         strv_free(c->read_only_dirs);
1893         c->read_only_dirs = NULL;
1894
1895         strv_free(c->read_write_dirs);
1896         c->read_write_dirs = NULL;
1897
1898         strv_free(c->inaccessible_dirs);
1899         c->inaccessible_dirs = NULL;
1900
1901         if (c->cpuset)
1902                 CPU_FREE(c->cpuset);
1903
1904         free(c->utmp_id);
1905         c->utmp_id = NULL;
1906
1907         free(c->selinux_context);
1908         c->selinux_context = NULL;
1909
1910         free(c->apparmor_profile);
1911         c->apparmor_profile = NULL;
1912
1913         set_free(c->syscall_filter);
1914         c->syscall_filter = NULL;
1915
1916         set_free(c->syscall_archs);
1917         c->syscall_archs = NULL;
1918
1919         set_free(c->address_families);
1920         c->address_families = NULL;
1921 }
1922
1923 void exec_command_done(ExecCommand *c) {
1924         assert(c);
1925
1926         free(c->path);
1927         c->path = NULL;
1928
1929         strv_free(c->argv);
1930         c->argv = NULL;
1931 }
1932
1933 void exec_command_done_array(ExecCommand *c, unsigned n) {
1934         unsigned i;
1935
1936         for (i = 0; i < n; i++)
1937                 exec_command_done(c+i);
1938 }
1939
1940 void exec_command_free_list(ExecCommand *c) {
1941         ExecCommand *i;
1942
1943         while ((i = c)) {
1944                 LIST_REMOVE(command, c, i);
1945                 exec_command_done(i);
1946                 free(i);
1947         }
1948 }
1949
1950 void exec_command_free_array(ExecCommand **c, unsigned n) {
1951         unsigned i;
1952
1953         for (i = 0; i < n; i++) {
1954                 exec_command_free_list(c[i]);
1955                 c[i] = NULL;
1956         }
1957 }
1958
1959 int exec_context_load_environment(const ExecContext *c, char ***l) {
1960         char **i, **r = NULL;
1961
1962         assert(c);
1963         assert(l);
1964
1965         STRV_FOREACH(i, c->environment_files) {
1966                 char *fn;
1967                 int k;
1968                 bool ignore = false;
1969                 char **p;
1970                 _cleanup_globfree_ glob_t pglob = {};
1971                 int count, n;
1972
1973                 fn = *i;
1974
1975                 if (fn[0] == '-') {
1976                         ignore = true;
1977                         fn ++;
1978                 }
1979
1980                 if (!path_is_absolute(fn)) {
1981                         if (ignore)
1982                                 continue;
1983
1984                         strv_free(r);
1985                         return -EINVAL;
1986                 }
1987
1988                 /* Filename supports globbing, take all matching files */
1989                 errno = 0;
1990                 if (glob(fn, 0, NULL, &pglob) != 0) {
1991                         if (ignore)
1992                                 continue;
1993
1994                         strv_free(r);
1995                         return errno ? -errno : -EINVAL;
1996                 }
1997                 count = pglob.gl_pathc;
1998                 if (count == 0) {
1999                         if (ignore)
2000                                 continue;
2001
2002                         strv_free(r);
2003                         return -EINVAL;
2004                 }
2005                 for (n = 0; n < count; n++) {
2006                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2007                         if (k < 0) {
2008                                 if (ignore)
2009                                         continue;
2010
2011                                 strv_free(r);
2012                                 return k;
2013                         }
2014                         /* Log invalid environment variables with filename */
2015                         if (p)
2016                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2017
2018                         if (r == NULL)
2019                                 r = p;
2020                         else {
2021                                 char **m;
2022
2023                                 m = strv_env_merge(2, r, p);
2024                                 strv_free(r);
2025                                 strv_free(p);
2026                                 if (!m)
2027                                         return -ENOMEM;
2028
2029                                 r = m;
2030                         }
2031                 }
2032         }
2033
2034         *l = r;
2035
2036         return 0;
2037 }
2038
2039 static bool tty_may_match_dev_console(const char *tty) {
2040         char *active = NULL, *console;
2041         bool b;
2042
2043         if (startswith(tty, "/dev/"))
2044                 tty += 5;
2045
2046         /* trivial identity? */
2047         if (streq(tty, "console"))
2048                 return true;
2049
2050         console = resolve_dev_console(&active);
2051         /* if we could not resolve, assume it may */
2052         if (!console)
2053                 return true;
2054
2055         /* "tty0" means the active VC, so it may be the same sometimes */
2056         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2057         free(active);
2058
2059         return b;
2060 }
2061
2062 bool exec_context_may_touch_console(ExecContext *ec) {
2063         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2064                 is_terminal_input(ec->std_input) ||
2065                 is_terminal_output(ec->std_output) ||
2066                 is_terminal_output(ec->std_error)) &&
2067                tty_may_match_dev_console(tty_path(ec));
2068 }
2069
2070 static void strv_fprintf(FILE *f, char **l) {
2071         char **g;
2072
2073         assert(f);
2074
2075         STRV_FOREACH(g, l)
2076                 fprintf(f, " %s", *g);
2077 }
2078
2079 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2080         char **e;
2081         unsigned i;
2082
2083         assert(c);
2084         assert(f);
2085
2086         prefix = strempty(prefix);
2087
2088         fprintf(f,
2089                 "%sUMask: %04o\n"
2090                 "%sWorkingDirectory: %s\n"
2091                 "%sRootDirectory: %s\n"
2092                 "%sNonBlocking: %s\n"
2093                 "%sPrivateTmp: %s\n"
2094                 "%sPrivateNetwork: %s\n"
2095                 "%sPrivateDevices: %s\n"
2096                 "%sIgnoreSIGPIPE: %s\n",
2097                 prefix, c->umask,
2098                 prefix, c->working_directory ? c->working_directory : "/",
2099                 prefix, c->root_directory ? c->root_directory : "/",
2100                 prefix, yes_no(c->non_blocking),
2101                 prefix, yes_no(c->private_tmp),
2102                 prefix, yes_no(c->private_network),
2103                 prefix, yes_no(c->private_devices),
2104                 prefix, yes_no(c->ignore_sigpipe));
2105
2106         STRV_FOREACH(e, c->environment)
2107                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2108
2109         STRV_FOREACH(e, c->environment_files)
2110                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2111
2112         if (c->tcpwrap_name)
2113                 fprintf(f,
2114                         "%sTCPWrapName: %s\n",
2115                         prefix, c->tcpwrap_name);
2116
2117         if (c->nice_set)
2118                 fprintf(f,
2119                         "%sNice: %i\n",
2120                         prefix, c->nice);
2121
2122         if (c->oom_score_adjust_set)
2123                 fprintf(f,
2124                         "%sOOMScoreAdjust: %i\n",
2125                         prefix, c->oom_score_adjust);
2126
2127         for (i = 0; i < RLIM_NLIMITS; i++)
2128                 if (c->rlimit[i])
2129                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
2130
2131         if (c->ioprio_set) {
2132                 _cleanup_free_ char *class_str = NULL;
2133
2134                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2135                 fprintf(f,
2136                         "%sIOSchedulingClass: %s\n"
2137                         "%sIOPriority: %i\n",
2138                         prefix, strna(class_str),
2139                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2140         }
2141
2142         if (c->cpu_sched_set) {
2143                 _cleanup_free_ char *policy_str = NULL;
2144
2145                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2146                 fprintf(f,
2147                         "%sCPUSchedulingPolicy: %s\n"
2148                         "%sCPUSchedulingPriority: %i\n"
2149                         "%sCPUSchedulingResetOnFork: %s\n",
2150                         prefix, strna(policy_str),
2151                         prefix, c->cpu_sched_priority,
2152                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2153         }
2154
2155         if (c->cpuset) {
2156                 fprintf(f, "%sCPUAffinity:", prefix);
2157                 for (i = 0; i < c->cpuset_ncpus; i++)
2158                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2159                                 fprintf(f, " %u", i);
2160                 fputs("\n", f);
2161         }
2162
2163         if (c->timer_slack_nsec != (nsec_t) -1)
2164                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2165
2166         fprintf(f,
2167                 "%sStandardInput: %s\n"
2168                 "%sStandardOutput: %s\n"
2169                 "%sStandardError: %s\n",
2170                 prefix, exec_input_to_string(c->std_input),
2171                 prefix, exec_output_to_string(c->std_output),
2172                 prefix, exec_output_to_string(c->std_error));
2173
2174         if (c->tty_path)
2175                 fprintf(f,
2176                         "%sTTYPath: %s\n"
2177                         "%sTTYReset: %s\n"
2178                         "%sTTYVHangup: %s\n"
2179                         "%sTTYVTDisallocate: %s\n",
2180                         prefix, c->tty_path,
2181                         prefix, yes_no(c->tty_reset),
2182                         prefix, yes_no(c->tty_vhangup),
2183                         prefix, yes_no(c->tty_vt_disallocate));
2184
2185         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2186             c->std_output == EXEC_OUTPUT_KMSG ||
2187             c->std_output == EXEC_OUTPUT_JOURNAL ||
2188             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2189             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2190             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2191             c->std_error == EXEC_OUTPUT_SYSLOG ||
2192             c->std_error == EXEC_OUTPUT_KMSG ||
2193             c->std_error == EXEC_OUTPUT_JOURNAL ||
2194             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2195             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2196             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2197
2198                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2199
2200                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2201                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2202
2203                 fprintf(f,
2204                         "%sSyslogFacility: %s\n"
2205                         "%sSyslogLevel: %s\n",
2206                         prefix, strna(fac_str),
2207                         prefix, strna(lvl_str));
2208         }
2209
2210         if (c->capabilities) {
2211                 _cleanup_cap_free_charp_ char *t;
2212
2213                 t = cap_to_text(c->capabilities, NULL);
2214                 if (t)
2215                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2216         }
2217
2218         if (c->secure_bits)
2219                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2220                         prefix,
2221                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2222                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2223                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2224                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2225                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2226                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2227
2228         if (c->capability_bounding_set_drop) {
2229                 unsigned long l;
2230                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2231
2232                 for (l = 0; l <= cap_last_cap(); l++)
2233                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2234                                 _cleanup_cap_free_charp_ char *t;
2235
2236                                 t = cap_to_name(l);
2237                                 if (t)
2238                                         fprintf(f, " %s", t);
2239                         }
2240
2241                 fputs("\n", f);
2242         }
2243
2244         if (c->user)
2245                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2246         if (c->group)
2247                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2248
2249         if (strv_length(c->supplementary_groups) > 0) {
2250                 fprintf(f, "%sSupplementaryGroups:", prefix);
2251                 strv_fprintf(f, c->supplementary_groups);
2252                 fputs("\n", f);
2253         }
2254
2255         if (c->pam_name)
2256                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2257
2258         if (strv_length(c->read_write_dirs) > 0) {
2259                 fprintf(f, "%sReadWriteDirs:", prefix);
2260                 strv_fprintf(f, c->read_write_dirs);
2261                 fputs("\n", f);
2262         }
2263
2264         if (strv_length(c->read_only_dirs) > 0) {
2265                 fprintf(f, "%sReadOnlyDirs:", prefix);
2266                 strv_fprintf(f, c->read_only_dirs);
2267                 fputs("\n", f);
2268         }
2269
2270         if (strv_length(c->inaccessible_dirs) > 0) {
2271                 fprintf(f, "%sInaccessibleDirs:", prefix);
2272                 strv_fprintf(f, c->inaccessible_dirs);
2273                 fputs("\n", f);
2274         }
2275
2276         if (c->utmp_id)
2277                 fprintf(f,
2278                         "%sUtmpIdentifier: %s\n",
2279                         prefix, c->utmp_id);
2280
2281         if (c->selinux_context)
2282                 fprintf(f,
2283                         "%sSELinuxContext: %s%s\n",
2284                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2285
2286         if (c->personality != 0xffffffffUL)
2287                 fprintf(f,
2288                         "%sPersonality: %s\n",
2289                         prefix, strna(personality_to_string(c->personality)));
2290
2291         if (c->syscall_filter) {
2292 #ifdef HAVE_SECCOMP
2293                 Iterator j;
2294                 void *id;
2295                 bool first = true;
2296 #endif
2297
2298                 fprintf(f,
2299                         "%sSystemCallFilter: ",
2300                         prefix);
2301
2302                 if (!c->syscall_whitelist)
2303                         fputc('~', f);
2304
2305 #ifdef HAVE_SECCOMP
2306                 SET_FOREACH(id, c->syscall_filter, j) {
2307                         _cleanup_free_ char *name = NULL;
2308
2309                         if (first)
2310                                 first = false;
2311                         else
2312                                 fputc(' ', f);
2313
2314                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2315                         fputs(strna(name), f);
2316                 }
2317 #endif
2318
2319                 fputc('\n', f);
2320         }
2321
2322         if (c->syscall_archs) {
2323 #ifdef HAVE_SECCOMP
2324                 Iterator j;
2325                 void *id;
2326 #endif
2327
2328                 fprintf(f,
2329                         "%sSystemCallArchitectures:",
2330                         prefix);
2331
2332 #ifdef HAVE_SECCOMP
2333                 SET_FOREACH(id, c->syscall_archs, j)
2334                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2335 #endif
2336                 fputc('\n', f);
2337         }
2338
2339         if (c->syscall_errno != 0)
2340                 fprintf(f,
2341                         "%sSystemCallErrorNumber: %s\n",
2342                         prefix, strna(errno_to_name(c->syscall_errno)));
2343
2344         if (c->apparmor_profile)
2345                 fprintf(f,
2346                         "%sAppArmorProfile: %s%s\n",
2347                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2348 }
2349
2350 void exec_status_start(ExecStatus *s, pid_t pid) {
2351         assert(s);
2352
2353         zero(*s);
2354         s->pid = pid;
2355         dual_timestamp_get(&s->start_timestamp);
2356 }
2357
2358 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2359         assert(s);
2360
2361         if (s->pid && s->pid != pid)
2362                 zero(*s);
2363
2364         s->pid = pid;
2365         dual_timestamp_get(&s->exit_timestamp);
2366
2367         s->code = code;
2368         s->status = status;
2369
2370         if (context) {
2371                 if (context->utmp_id)
2372                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2373
2374                 exec_context_tty_reset(context);
2375         }
2376 }
2377
2378 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2379         char buf[FORMAT_TIMESTAMP_MAX];
2380
2381         assert(s);
2382         assert(f);
2383
2384         if (!prefix)
2385                 prefix = "";
2386
2387         if (s->pid <= 0)
2388                 return;
2389
2390         fprintf(f,
2391                 "%sPID: "PID_FMT"\n",
2392                 prefix, s->pid);
2393
2394         if (s->start_timestamp.realtime > 0)
2395                 fprintf(f,
2396                         "%sStart Timestamp: %s\n",
2397                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2398
2399         if (s->exit_timestamp.realtime > 0)
2400                 fprintf(f,
2401                         "%sExit Timestamp: %s\n"
2402                         "%sExit Code: %s\n"
2403                         "%sExit Status: %i\n",
2404                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2405                         prefix, sigchld_code_to_string(s->code),
2406                         prefix, s->status);
2407 }
2408
2409 char *exec_command_line(char **argv) {
2410         size_t k;
2411         char *n, *p, **a;
2412         bool first = true;
2413
2414         assert(argv);
2415
2416         k = 1;
2417         STRV_FOREACH(a, argv)
2418                 k += strlen(*a)+3;
2419
2420         if (!(n = new(char, k)))
2421                 return NULL;
2422
2423         p = n;
2424         STRV_FOREACH(a, argv) {
2425
2426                 if (!first)
2427                         *(p++) = ' ';
2428                 else
2429                         first = false;
2430
2431                 if (strpbrk(*a, WHITESPACE)) {
2432                         *(p++) = '\'';
2433                         p = stpcpy(p, *a);
2434                         *(p++) = '\'';
2435                 } else
2436                         p = stpcpy(p, *a);
2437
2438         }
2439
2440         *p = 0;
2441
2442         /* FIXME: this doesn't really handle arguments that have
2443          * spaces and ticks in them */
2444
2445         return n;
2446 }
2447
2448 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2449         char *p2;
2450         const char *prefix2;
2451
2452         char *cmd;
2453
2454         assert(c);
2455         assert(f);
2456
2457         if (!prefix)
2458                 prefix = "";
2459         p2 = strappend(prefix, "\t");
2460         prefix2 = p2 ? p2 : prefix;
2461
2462         cmd = exec_command_line(c->argv);
2463
2464         fprintf(f,
2465                 "%sCommand Line: %s\n",
2466                 prefix, cmd ? cmd : strerror(ENOMEM));
2467
2468         free(cmd);
2469
2470         exec_status_dump(&c->exec_status, f, prefix2);
2471
2472         free(p2);
2473 }
2474
2475 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2476         assert(f);
2477
2478         if (!prefix)
2479                 prefix = "";
2480
2481         LIST_FOREACH(command, c, c)
2482                 exec_command_dump(c, f, prefix);
2483 }
2484
2485 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2486         ExecCommand *end;
2487
2488         assert(l);
2489         assert(e);
2490
2491         if (*l) {
2492                 /* It's kind of important, that we keep the order here */
2493                 LIST_FIND_TAIL(command, *l, end);
2494                 LIST_INSERT_AFTER(command, *l, end, e);
2495         } else
2496               *l = e;
2497 }
2498
2499 int exec_command_set(ExecCommand *c, const char *path, ...) {
2500         va_list ap;
2501         char **l, *p;
2502
2503         assert(c);
2504         assert(path);
2505
2506         va_start(ap, path);
2507         l = strv_new_ap(path, ap);
2508         va_end(ap);
2509
2510         if (!l)
2511                 return -ENOMEM;
2512
2513         p = strdup(path);
2514         if (!p) {
2515                 strv_free(l);
2516                 return -ENOMEM;
2517         }
2518
2519         free(c->path);
2520         c->path = p;
2521
2522         strv_free(c->argv);
2523         c->argv = l;
2524
2525         return 0;
2526 }
2527
2528 static int exec_runtime_allocate(ExecRuntime **rt) {
2529
2530         if (*rt)
2531                 return 0;
2532
2533         *rt = new0(ExecRuntime, 1);
2534         if (!*rt)
2535                 return -ENOMEM;
2536
2537         (*rt)->n_ref = 1;
2538         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2539
2540         return 0;
2541 }
2542
2543 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2544         int r;
2545
2546         assert(rt);
2547         assert(c);
2548         assert(id);
2549
2550         if (*rt)
2551                 return 1;
2552
2553         if (!c->private_network && !c->private_tmp)
2554                 return 0;
2555
2556         r = exec_runtime_allocate(rt);
2557         if (r < 0)
2558                 return r;
2559
2560         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2561                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2562                         return -errno;
2563         }
2564
2565         if (c->private_tmp && !(*rt)->tmp_dir) {
2566                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2567                 if (r < 0)
2568                         return r;
2569         }
2570
2571         return 1;
2572 }
2573
2574 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2575         assert(r);
2576         assert(r->n_ref > 0);
2577
2578         r->n_ref++;
2579         return r;
2580 }
2581
2582 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2583
2584         if (!r)
2585                 return NULL;
2586
2587         assert(r->n_ref > 0);
2588
2589         r->n_ref--;
2590         if (r->n_ref <= 0) {
2591                 free(r->tmp_dir);
2592                 free(r->var_tmp_dir);
2593                 close_pipe(r->netns_storage_socket);
2594                 free(r);
2595         }
2596
2597         return NULL;
2598 }
2599
2600 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2601         assert(u);
2602         assert(f);
2603         assert(fds);
2604
2605         if (!rt)
2606                 return 0;
2607
2608         if (rt->tmp_dir)
2609                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2610
2611         if (rt->var_tmp_dir)
2612                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2613
2614         if (rt->netns_storage_socket[0] >= 0) {
2615                 int copy;
2616
2617                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2618                 if (copy < 0)
2619                         return copy;
2620
2621                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2622         }
2623
2624         if (rt->netns_storage_socket[1] >= 0) {
2625                 int copy;
2626
2627                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2628                 if (copy < 0)
2629                         return copy;
2630
2631                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2632         }
2633
2634         return 0;
2635 }
2636
2637 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2638         int r;
2639
2640         assert(rt);
2641         assert(key);
2642         assert(value);
2643
2644         if (streq(key, "tmp-dir")) {
2645                 char *copy;
2646
2647                 r = exec_runtime_allocate(rt);
2648                 if (r < 0)
2649                         return r;
2650
2651                 copy = strdup(value);
2652                 if (!copy)
2653                         return log_oom();
2654
2655                 free((*rt)->tmp_dir);
2656                 (*rt)->tmp_dir = copy;
2657
2658         } else if (streq(key, "var-tmp-dir")) {
2659                 char *copy;
2660
2661                 r = exec_runtime_allocate(rt);
2662                 if (r < 0)
2663                         return r;
2664
2665                 copy = strdup(value);
2666                 if (!copy)
2667                         return log_oom();
2668
2669                 free((*rt)->var_tmp_dir);
2670                 (*rt)->var_tmp_dir = copy;
2671
2672         } else if (streq(key, "netns-socket-0")) {
2673                 int fd;
2674
2675                 r = exec_runtime_allocate(rt);
2676                 if (r < 0)
2677                         return r;
2678
2679                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2680                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2681                 else {
2682                         if ((*rt)->netns_storage_socket[0] >= 0)
2683                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2684
2685                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2686                 }
2687         } else if (streq(key, "netns-socket-1")) {
2688                 int fd;
2689
2690                 r = exec_runtime_allocate(rt);
2691                 if (r < 0)
2692                         return r;
2693
2694                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2695                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2696                 else {
2697                         if ((*rt)->netns_storage_socket[1] >= 0)
2698                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2699
2700                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2701                 }
2702         } else
2703                 return 0;
2704
2705         return 1;
2706 }
2707
2708 static void *remove_tmpdir_thread(void *p) {
2709         _cleanup_free_ char *path = p;
2710
2711         rm_rf_dangerous(path, false, true, false);
2712         return NULL;
2713 }
2714
2715 void exec_runtime_destroy(ExecRuntime *rt) {
2716         if (!rt)
2717                 return;
2718
2719         /* If there are multiple users of this, let's leave the stuff around */
2720         if (rt->n_ref > 1)
2721                 return;
2722
2723         if (rt->tmp_dir) {
2724                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2725                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2726                 rt->tmp_dir = NULL;
2727         }
2728
2729         if (rt->var_tmp_dir) {
2730                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2731                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2732                 rt->var_tmp_dir = NULL;
2733         }
2734
2735         close_pipe(rt->netns_storage_socket);
2736 }
2737
2738 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2739         [EXEC_INPUT_NULL] = "null",
2740         [EXEC_INPUT_TTY] = "tty",
2741         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2742         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2743         [EXEC_INPUT_SOCKET] = "socket"
2744 };
2745
2746 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2747
2748 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2749         [EXEC_OUTPUT_INHERIT] = "inherit",
2750         [EXEC_OUTPUT_NULL] = "null",
2751         [EXEC_OUTPUT_TTY] = "tty",
2752         [EXEC_OUTPUT_SYSLOG] = "syslog",
2753         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2754         [EXEC_OUTPUT_KMSG] = "kmsg",
2755         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2756         [EXEC_OUTPUT_JOURNAL] = "journal",
2757         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2758         [EXEC_OUTPUT_SOCKET] = "socket"
2759 };
2760
2761 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);