chiark / gitweb /
a806d42827a8afa5389bc52948e6498b7cfee884
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104         int start, restart_from;
105
106         if (n_fds <= 0)
107                 return 0;
108
109         /* Modifies the fds array! (sorts it) */
110
111         assert(fds);
112
113         start = 0;
114         for (;;) {
115                 int i;
116
117                 restart_from = -1;
118
119                 for (i = start; i < (int) n_fds; i++) {
120                         int nfd;
121
122                         /* Already at right index? */
123                         if (fds[i] == i+3)
124                                 continue;
125
126                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127                                 return -errno;
128
129                         safe_close(fds[i]);
130                         fds[i] = nfd;
131
132                         /* Hmm, the fd we wanted isn't free? Then
133                          * let's remember that and try again from here */
134                         if (nfd != i+3 && restart_from < 0)
135                                 restart_from = i;
136                 }
137
138                 if (restart_from < 0)
139                         break;
140
141                 start = restart_from;
142         }
143
144         return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148         unsigned i;
149         int r;
150
151         if (n_fds <= 0)
152                 return 0;
153
154         assert(fds);
155
156         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158         for (i = 0; i < n_fds; i++) {
159
160                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161                         return r;
162
163                 /* We unconditionally drop FD_CLOEXEC from the fds,
164                  * since after all we want to pass these fds to our
165                  * children */
166
167                 if ((r = fd_cloexec(fds[i], false)) < 0)
168                         return r;
169         }
170
171         return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175         assert(context);
176
177         if (context->tty_path)
178                 return context->tty_path;
179
180         return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184         assert(context);
185
186         if (context->tty_vhangup)
187                 terminal_vhangup(tty_path(context));
188
189         if (context->tty_reset)
190                 reset_terminal(tty_path(context));
191
192         if (context->tty_vt_disallocate && context->tty_path)
193                 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197         return
198                 o == EXEC_OUTPUT_TTY ||
199                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205         int fd, r;
206
207         assert(nfd >= 0);
208
209         fd = open("/dev/null", flags|O_NOCTTY);
210         if (fd < 0)
211                 return -errno;
212
213         if (fd != nfd) {
214                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215                 safe_close(fd);
216         } else
217                 r = nfd;
218
219         return r;
220 }
221
222 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
223         int fd, r;
224         union sockaddr_union sa = {
225                 .un.sun_family = AF_UNIX,
226                 .un.sun_path = "/run/systemd/journal/stdout",
227         };
228
229         assert(context);
230         assert(output < _EXEC_OUTPUT_MAX);
231         assert(ident);
232         assert(nfd >= 0);
233
234         fd = socket(AF_UNIX, SOCK_STREAM, 0);
235         if (fd < 0)
236                 return -errno;
237
238         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
239         if (r < 0) {
240                 safe_close(fd);
241                 return -errno;
242         }
243
244         if (shutdown(fd, SHUT_RD) < 0) {
245                 safe_close(fd);
246                 return -errno;
247         }
248
249         fd_inc_sndbuf(fd, SNDBUF_SIZE);
250
251         dprintf(fd,
252                 "%s\n"
253                 "%s\n"
254                 "%i\n"
255                 "%i\n"
256                 "%i\n"
257                 "%i\n"
258                 "%i\n",
259                 context->syslog_identifier ? context->syslog_identifier : ident,
260                 unit_id,
261                 context->syslog_priority,
262                 !!context->syslog_level_prefix,
263                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
264                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
265                 is_terminal_output(output));
266
267         if (fd != nfd) {
268                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
269                 safe_close(fd);
270         } else
271                 r = nfd;
272
273         return r;
274 }
275 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
276         int fd, r;
277
278         assert(path);
279         assert(nfd >= 0);
280
281         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
282                 return fd;
283
284         if (fd != nfd) {
285                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286                 safe_close(fd);
287         } else
288                 r = nfd;
289
290         return r;
291 }
292
293 static bool is_terminal_input(ExecInput i) {
294         return
295                 i == EXEC_INPUT_TTY ||
296                 i == EXEC_INPUT_TTY_FORCE ||
297                 i == EXEC_INPUT_TTY_FAIL;
298 }
299
300 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
301
302         if (is_terminal_input(std_input) && !apply_tty_stdin)
303                 return EXEC_INPUT_NULL;
304
305         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
306                 return EXEC_INPUT_NULL;
307
308         return std_input;
309 }
310
311 static int fixup_output(ExecOutput std_output, int socket_fd) {
312
313         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
314                 return EXEC_OUTPUT_INHERIT;
315
316         return std_output;
317 }
318
319 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
320         ExecInput i;
321
322         assert(context);
323
324         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
325
326         switch (i) {
327
328         case EXEC_INPUT_NULL:
329                 return open_null_as(O_RDONLY, STDIN_FILENO);
330
331         case EXEC_INPUT_TTY:
332         case EXEC_INPUT_TTY_FORCE:
333         case EXEC_INPUT_TTY_FAIL: {
334                 int fd, r;
335
336                 fd = acquire_terminal(tty_path(context),
337                                       i == EXEC_INPUT_TTY_FAIL,
338                                       i == EXEC_INPUT_TTY_FORCE,
339                                       false,
340                                       USEC_INFINITY);
341                 if (fd < 0)
342                         return fd;
343
344                 if (fd != STDIN_FILENO) {
345                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
346                         safe_close(fd);
347                 } else
348                         r = STDIN_FILENO;
349
350                 return r;
351         }
352
353         case EXEC_INPUT_SOCKET:
354                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
355
356         default:
357                 assert_not_reached("Unknown input type");
358         }
359 }
360
361 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
362         ExecOutput o;
363         ExecInput i;
364         int r;
365
366         assert(context);
367         assert(ident);
368
369         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
370         o = fixup_output(context->std_output, socket_fd);
371
372         if (fileno == STDERR_FILENO) {
373                 ExecOutput e;
374                 e = fixup_output(context->std_error, socket_fd);
375
376                 /* This expects the input and output are already set up */
377
378                 /* Don't change the stderr file descriptor if we inherit all
379                  * the way and are not on a tty */
380                 if (e == EXEC_OUTPUT_INHERIT &&
381                     o == EXEC_OUTPUT_INHERIT &&
382                     i == EXEC_INPUT_NULL &&
383                     !is_terminal_input(context->std_input) &&
384                     getppid () != 1)
385                         return fileno;
386
387                 /* Duplicate from stdout if possible */
388                 if (e == o || e == EXEC_OUTPUT_INHERIT)
389                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
390
391                 o = e;
392
393         } else if (o == EXEC_OUTPUT_INHERIT) {
394                 /* If input got downgraded, inherit the original value */
395                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
396                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
397
398                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
399                 if (i != EXEC_INPUT_NULL)
400                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401
402                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
403                 if (getppid() != 1)
404                         return fileno;
405
406                 /* We need to open /dev/null here anew, to get the right access mode. */
407                 return open_null_as(O_WRONLY, fileno);
408         }
409
410         switch (o) {
411
412         case EXEC_OUTPUT_NULL:
413                 return open_null_as(O_WRONLY, fileno);
414
415         case EXEC_OUTPUT_TTY:
416                 if (is_terminal_input(i))
417                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
418
419                 /* We don't reset the terminal if this is just about output */
420                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
421
422         case EXEC_OUTPUT_SYSLOG:
423         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
424         case EXEC_OUTPUT_KMSG:
425         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
426         case EXEC_OUTPUT_JOURNAL:
427         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
428                 r = connect_logger_as(context, o, ident, unit_id, fileno);
429                 if (r < 0) {
430                         log_unit_struct(unit_id,
431                                         LOG_CRIT,
432                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
433                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
434                                                     unit_id, strerror(-r)),
435                                         LOG_ERRNO(-r),
436                                         NULL);
437                         r = open_null_as(O_WRONLY, fileno);
438                 }
439                 return r;
440
441         case EXEC_OUTPUT_SOCKET:
442                 assert(socket_fd >= 0);
443                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
444
445         default:
446                 assert_not_reached("Unknown error type");
447         }
448 }
449
450 static int chown_terminal(int fd, uid_t uid) {
451         struct stat st;
452
453         assert(fd >= 0);
454
455         /* This might fail. What matters are the results. */
456         (void) fchown(fd, uid, -1);
457         (void) fchmod(fd, TTY_MODE);
458
459         if (fstat(fd, &st) < 0)
460                 return -errno;
461
462         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
463                 return -EPERM;
464
465         return 0;
466 }
467
468 static int setup_confirm_stdio(int *_saved_stdin,
469                                int *_saved_stdout) {
470         int fd = -1, saved_stdin, saved_stdout = -1, r;
471
472         assert(_saved_stdin);
473         assert(_saved_stdout);
474
475         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
476         if (saved_stdin < 0)
477                 return -errno;
478
479         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
480         if (saved_stdout < 0) {
481                 r = errno;
482                 goto fail;
483         }
484
485         fd = acquire_terminal(
486                         "/dev/console",
487                         false,
488                         false,
489                         false,
490                         DEFAULT_CONFIRM_USEC);
491         if (fd < 0) {
492                 r = fd;
493                 goto fail;
494         }
495
496         r = chown_terminal(fd, getuid());
497         if (r < 0)
498                 goto fail;
499
500         if (dup2(fd, STDIN_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (dup2(fd, STDOUT_FILENO) < 0) {
506                 r = -errno;
507                 goto fail;
508         }
509
510         if (fd >= 2)
511                 safe_close(fd);
512
513         *_saved_stdin = saved_stdin;
514         *_saved_stdout = saved_stdout;
515
516         return 0;
517
518 fail:
519         safe_close(saved_stdout);
520         safe_close(saved_stdin);
521         safe_close(fd);
522
523         return r;
524 }
525
526 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
527         _cleanup_close_ int fd = -1;
528         va_list ap;
529
530         assert(format);
531
532         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
533         if (fd < 0)
534                 return fd;
535
536         va_start(ap, format);
537         vdprintf(fd, format, ap);
538         va_end(ap);
539
540         return 0;
541 }
542
543 static int restore_confirm_stdio(int *saved_stdin,
544                                  int *saved_stdout) {
545
546         int r = 0;
547
548         assert(saved_stdin);
549         assert(saved_stdout);
550
551         release_terminal();
552
553         if (*saved_stdin >= 0)
554                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
555                         r = -errno;
556
557         if (*saved_stdout >= 0)
558                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
559                         r = -errno;
560
561         safe_close(*saved_stdin);
562         safe_close(*saved_stdout);
563
564         return r;
565 }
566
567 static int ask_for_confirmation(char *response, char **argv) {
568         int saved_stdout = -1, saved_stdin = -1, r;
569         _cleanup_free_ char *line = NULL;
570
571         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
572         if (r < 0)
573                 return r;
574
575         line = exec_command_line(argv);
576         if (!line)
577                 return -ENOMEM;
578
579         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
580
581         restore_confirm_stdio(&saved_stdin, &saved_stdout);
582
583         return r;
584 }
585
586 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
587         bool keep_groups = false;
588         int r;
589
590         assert(context);
591
592         /* Lookup and set GID and supplementary group list. Here too
593          * we avoid NSS lookups for gid=0. */
594
595         if (context->group || username) {
596
597                 if (context->group) {
598                         const char *g = context->group;
599
600                         if ((r = get_group_creds(&g, &gid)) < 0)
601                                 return r;
602                 }
603
604                 /* First step, initialize groups from /etc/groups */
605                 if (username && gid != 0) {
606                         if (initgroups(username, gid) < 0)
607                                 return -errno;
608
609                         keep_groups = true;
610                 }
611
612                 /* Second step, set our gids */
613                 if (setresgid(gid, gid, gid) < 0)
614                         return -errno;
615         }
616
617         if (context->supplementary_groups) {
618                 int ngroups_max, k;
619                 gid_t *gids;
620                 char **i;
621
622                 /* Final step, initialize any manually set supplementary groups */
623                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
624
625                 if (!(gids = new(gid_t, ngroups_max)))
626                         return -ENOMEM;
627
628                 if (keep_groups) {
629                         if ((k = getgroups(ngroups_max, gids)) < 0) {
630                                 free(gids);
631                                 return -errno;
632                         }
633                 } else
634                         k = 0;
635
636                 STRV_FOREACH(i, context->supplementary_groups) {
637                         const char *g;
638
639                         if (k >= ngroups_max) {
640                                 free(gids);
641                                 return -E2BIG;
642                         }
643
644                         g = *i;
645                         r = get_group_creds(&g, gids+k);
646                         if (r < 0) {
647                                 free(gids);
648                                 return r;
649                         }
650
651                         k++;
652                 }
653
654                 if (setgroups(k, gids) < 0) {
655                         free(gids);
656                         return -errno;
657                 }
658
659                 free(gids);
660         }
661
662         return 0;
663 }
664
665 static int enforce_user(const ExecContext *context, uid_t uid) {
666         assert(context);
667
668         /* Sets (but doesn't lookup) the uid and make sure we keep the
669          * capabilities while doing so. */
670
671         if (context->capabilities) {
672                 _cleanup_cap_free_ cap_t d = NULL;
673                 static const cap_value_t bits[] = {
674                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
675                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
676                 };
677
678                 /* First step: If we need to keep capabilities but
679                  * drop privileges we need to make sure we keep our
680                  * caps, while we drop privileges. */
681                 if (uid != 0) {
682                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
683
684                         if (prctl(PR_GET_SECUREBITS) != sb)
685                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
686                                         return -errno;
687                 }
688
689                 /* Second step: set the capabilities. This will reduce
690                  * the capabilities to the minimum we need. */
691
692                 d = cap_dup(context->capabilities);
693                 if (!d)
694                         return -errno;
695
696                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
697                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
698                         return -errno;
699
700                 if (cap_set_proc(d) < 0)
701                         return -errno;
702         }
703
704         /* Third step: actually set the uids */
705         if (setresuid(uid, uid, uid) < 0)
706                 return -errno;
707
708         /* At this point we should have all necessary capabilities but
709            are otherwise a normal user. However, the caps might got
710            corrupted due to the setresuid() so we need clean them up
711            later. This is done outside of this call. */
712
713         return 0;
714 }
715
716 #ifdef HAVE_PAM
717
718 static int null_conv(
719                 int num_msg,
720                 const struct pam_message **msg,
721                 struct pam_response **resp,
722                 void *appdata_ptr) {
723
724         /* We don't support conversations */
725
726         return PAM_CONV_ERR;
727 }
728
729 static int setup_pam(
730                 const char *name,
731                 const char *user,
732                 uid_t uid,
733                 const char *tty,
734                 char ***pam_env,
735                 int fds[], unsigned n_fds) {
736
737         static const struct pam_conv conv = {
738                 .conv = null_conv,
739                 .appdata_ptr = NULL
740         };
741
742         pam_handle_t *handle = NULL;
743         sigset_t ss, old_ss;
744         int pam_code = PAM_SUCCESS;
745         int err;
746         char **e = NULL;
747         bool close_session = false;
748         pid_t pam_pid = 0, parent_pid;
749         int flags = 0;
750
751         assert(name);
752         assert(user);
753         assert(pam_env);
754
755         /* We set up PAM in the parent process, then fork. The child
756          * will then stay around until killed via PR_GET_PDEATHSIG or
757          * systemd via the cgroup logic. It will then remove the PAM
758          * session again. The parent process will exec() the actual
759          * daemon. We do things this way to ensure that the main PID
760          * of the daemon is the one we initially fork()ed. */
761
762         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
763                 flags |= PAM_SILENT;
764
765         pam_code = pam_start(name, user, &conv, &handle);
766         if (pam_code != PAM_SUCCESS) {
767                 handle = NULL;
768                 goto fail;
769         }
770
771         if (tty) {
772                 pam_code = pam_set_item(handle, PAM_TTY, tty);
773                 if (pam_code != PAM_SUCCESS)
774                         goto fail;
775         }
776
777         pam_code = pam_acct_mgmt(handle, flags);
778         if (pam_code != PAM_SUCCESS)
779                 goto fail;
780
781         pam_code = pam_open_session(handle, flags);
782         if (pam_code != PAM_SUCCESS)
783                 goto fail;
784
785         close_session = true;
786
787         e = pam_getenvlist(handle);
788         if (!e) {
789                 pam_code = PAM_BUF_ERR;
790                 goto fail;
791         }
792
793         /* Block SIGTERM, so that we know that it won't get lost in
794          * the child */
795         if (sigemptyset(&ss) < 0 ||
796             sigaddset(&ss, SIGTERM) < 0 ||
797             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
798                 goto fail;
799
800         parent_pid = getpid();
801
802         pam_pid = fork();
803         if (pam_pid < 0)
804                 goto fail;
805
806         if (pam_pid == 0) {
807                 int sig;
808                 int r = EXIT_PAM;
809
810                 /* The child's job is to reset the PAM session on
811                  * termination */
812
813                 /* This string must fit in 10 chars (i.e. the length
814                  * of "/sbin/init"), to look pretty in /bin/ps */
815                 rename_process("(sd-pam)");
816
817                 /* Make sure we don't keep open the passed fds in this
818                 child. We assume that otherwise only those fds are
819                 open here that have been opened by PAM. */
820                 close_many(fds, n_fds);
821
822                 /* Drop privileges - we don't need any to pam_close_session
823                  * and this will make PR_SET_PDEATHSIG work in most cases.
824                  * If this fails, ignore the error - but expect sd-pam threads
825                  * to fail to exit normally */
826                 if (setresuid(uid, uid, uid) < 0)
827                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
828
829                 /* Wait until our parent died. This will only work if
830                  * the above setresuid() succeeds, otherwise the kernel
831                  * will not allow unprivileged parents kill their privileged
832                  * children this way. We rely on the control groups kill logic
833                  * to do the rest for us. */
834                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
835                         goto child_finish;
836
837                 /* Check if our parent process might already have
838                  * died? */
839                 if (getppid() == parent_pid) {
840                         for (;;) {
841                                 if (sigwait(&ss, &sig) < 0) {
842                                         if (errno == EINTR)
843                                                 continue;
844
845                                         goto child_finish;
846                                 }
847
848                                 assert(sig == SIGTERM);
849                                 break;
850                         }
851                 }
852
853                 /* If our parent died we'll end the session */
854                 if (getppid() != parent_pid) {
855                         pam_code = pam_close_session(handle, flags);
856                         if (pam_code != PAM_SUCCESS)
857                                 goto child_finish;
858                 }
859
860                 r = 0;
861
862         child_finish:
863                 pam_end(handle, pam_code | flags);
864                 _exit(r);
865         }
866
867         /* If the child was forked off successfully it will do all the
868          * cleanups, so forget about the handle here. */
869         handle = NULL;
870
871         /* Unblock SIGTERM again in the parent */
872         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
873                 goto fail;
874
875         /* We close the log explicitly here, since the PAM modules
876          * might have opened it, but we don't want this fd around. */
877         closelog();
878
879         *pam_env = e;
880         e = NULL;
881
882         return 0;
883
884 fail:
885         if (pam_code != PAM_SUCCESS) {
886                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
887                 err = -EPERM;  /* PAM errors do not map to errno */
888         } else {
889                 log_error_errno(errno, "PAM failed: %m");
890                 err = -errno;
891         }
892
893         if (handle) {
894                 if (close_session)
895                         pam_code = pam_close_session(handle, flags);
896
897                 pam_end(handle, pam_code | flags);
898         }
899
900         strv_free(e);
901
902         closelog();
903
904         if (pam_pid > 1) {
905                 kill(pam_pid, SIGTERM);
906                 kill(pam_pid, SIGCONT);
907         }
908
909         return err;
910 }
911 #endif
912
913 static void rename_process_from_path(const char *path) {
914         char process_name[11];
915         const char *p;
916         size_t l;
917
918         /* This resulting string must fit in 10 chars (i.e. the length
919          * of "/sbin/init") to look pretty in /bin/ps */
920
921         p = basename(path);
922         if (isempty(p)) {
923                 rename_process("(...)");
924                 return;
925         }
926
927         l = strlen(p);
928         if (l > 8) {
929                 /* The end of the process name is usually more
930                  * interesting, since the first bit might just be
931                  * "systemd-" */
932                 p = p + l - 8;
933                 l = 8;
934         }
935
936         process_name[0] = '(';
937         memcpy(process_name+1, p, l);
938         process_name[1+l] = ')';
939         process_name[1+l+1] = 0;
940
941         rename_process(process_name);
942 }
943
944 #ifdef HAVE_SECCOMP
945
946 static int apply_seccomp(const ExecContext *c) {
947         uint32_t negative_action, action;
948         scmp_filter_ctx *seccomp;
949         Iterator i;
950         void *id;
951         int r;
952
953         assert(c);
954
955         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
956
957         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
958         if (!seccomp)
959                 return -ENOMEM;
960
961         if (c->syscall_archs) {
962
963                 SET_FOREACH(id, c->syscall_archs, i) {
964                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
965                         if (r == -EEXIST)
966                                 continue;
967                         if (r < 0)
968                                 goto finish;
969                 }
970
971         } else {
972                 r = seccomp_add_secondary_archs(seccomp);
973                 if (r < 0)
974                         goto finish;
975         }
976
977         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
978         SET_FOREACH(id, c->syscall_filter, i) {
979                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
980                 if (r < 0)
981                         goto finish;
982         }
983
984         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
985         if (r < 0)
986                 goto finish;
987
988         r = seccomp_load(seccomp);
989
990 finish:
991         seccomp_release(seccomp);
992         return r;
993 }
994
995 static int apply_address_families(const ExecContext *c) {
996         scmp_filter_ctx *seccomp;
997         Iterator i;
998         int r;
999
1000         assert(c);
1001
1002         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1003         if (!seccomp)
1004                 return -ENOMEM;
1005
1006         r = seccomp_add_secondary_archs(seccomp);
1007         if (r < 0)
1008                 goto finish;
1009
1010         if (c->address_families_whitelist) {
1011                 int af, first = 0, last = 0;
1012                 void *afp;
1013
1014                 /* If this is a whitelist, we first block the address
1015                  * families that are out of range and then everything
1016                  * that is not in the set. First, we find the lowest
1017                  * and highest address family in the set. */
1018
1019                 SET_FOREACH(afp, c->address_families, i) {
1020                         af = PTR_TO_INT(afp);
1021
1022                         if (af <= 0 || af >= af_max())
1023                                 continue;
1024
1025                         if (first == 0 || af < first)
1026                                 first = af;
1027
1028                         if (last == 0 || af > last)
1029                                 last = af;
1030                 }
1031
1032                 assert((first == 0) == (last == 0));
1033
1034                 if (first == 0) {
1035
1036                         /* No entries in the valid range, block everything */
1037                         r = seccomp_rule_add(
1038                                         seccomp,
1039                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1040                                         SCMP_SYS(socket),
1041                                         0);
1042                         if (r < 0)
1043                                 goto finish;
1044
1045                 } else {
1046
1047                         /* Block everything below the first entry */
1048                         r = seccomp_rule_add(
1049                                         seccomp,
1050                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1051                                         SCMP_SYS(socket),
1052                                         1,
1053                                         SCMP_A0(SCMP_CMP_LT, first));
1054                         if (r < 0)
1055                                 goto finish;
1056
1057                         /* Block everything above the last entry */
1058                         r = seccomp_rule_add(
1059                                         seccomp,
1060                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1061                                         SCMP_SYS(socket),
1062                                         1,
1063                                         SCMP_A0(SCMP_CMP_GT, last));
1064                         if (r < 0)
1065                                 goto finish;
1066
1067                         /* Block everything between the first and last
1068                          * entry */
1069                         for (af = 1; af < af_max(); af++) {
1070
1071                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1072                                         continue;
1073
1074                                 r = seccomp_rule_add(
1075                                                 seccomp,
1076                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1077                                                 SCMP_SYS(socket),
1078                                                 1,
1079                                                 SCMP_A0(SCMP_CMP_EQ, af));
1080                                 if (r < 0)
1081                                         goto finish;
1082                         }
1083                 }
1084
1085         } else {
1086                 void *af;
1087
1088                 /* If this is a blacklist, then generate one rule for
1089                  * each address family that are then combined in OR
1090                  * checks. */
1091
1092                 SET_FOREACH(af, c->address_families, i) {
1093
1094                         r = seccomp_rule_add(
1095                                         seccomp,
1096                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097                                         SCMP_SYS(socket),
1098                                         1,
1099                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1100                         if (r < 0)
1101                                 goto finish;
1102                 }
1103         }
1104
1105         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1106         if (r < 0)
1107                 goto finish;
1108
1109         r = seccomp_load(seccomp);
1110
1111 finish:
1112         seccomp_release(seccomp);
1113         return r;
1114 }
1115
1116 #endif
1117
1118 static void do_idle_pipe_dance(int idle_pipe[4]) {
1119         assert(idle_pipe);
1120
1121
1122         safe_close(idle_pipe[1]);
1123         safe_close(idle_pipe[2]);
1124
1125         if (idle_pipe[0] >= 0) {
1126                 int r;
1127
1128                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1129
1130                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1131                         /* Signal systemd that we are bored and want to continue. */
1132                         write(idle_pipe[3], "x", 1);
1133
1134                         /* Wait for systemd to react to the signal above. */
1135                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1136                 }
1137
1138                 safe_close(idle_pipe[0]);
1139
1140         }
1141
1142         safe_close(idle_pipe[3]);
1143 }
1144
1145 static int build_environment(
1146                 const ExecContext *c,
1147                 unsigned n_fds,
1148                 usec_t watchdog_usec,
1149                 const char *home,
1150                 const char *username,
1151                 const char *shell,
1152                 char ***ret) {
1153
1154         _cleanup_strv_free_ char **our_env = NULL;
1155         unsigned n_env = 0;
1156         char *x;
1157
1158         assert(c);
1159         assert(ret);
1160
1161         our_env = new0(char*, 10);
1162         if (!our_env)
1163                 return -ENOMEM;
1164
1165         if (n_fds > 0) {
1166                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1167                         return -ENOMEM;
1168                 our_env[n_env++] = x;
1169
1170                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1171                         return -ENOMEM;
1172                 our_env[n_env++] = x;
1173         }
1174
1175         if (watchdog_usec > 0) {
1176                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1177                         return -ENOMEM;
1178                 our_env[n_env++] = x;
1179
1180                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1181                         return -ENOMEM;
1182                 our_env[n_env++] = x;
1183         }
1184
1185         if (home) {
1186                 x = strappend("HOME=", home);
1187                 if (!x)
1188                         return -ENOMEM;
1189                 our_env[n_env++] = x;
1190         }
1191
1192         if (username) {
1193                 x = strappend("LOGNAME=", username);
1194                 if (!x)
1195                         return -ENOMEM;
1196                 our_env[n_env++] = x;
1197
1198                 x = strappend("USER=", username);
1199                 if (!x)
1200                         return -ENOMEM;
1201                 our_env[n_env++] = x;
1202         }
1203
1204         if (shell) {
1205                 x = strappend("SHELL=", shell);
1206                 if (!x)
1207                         return -ENOMEM;
1208                 our_env[n_env++] = x;
1209         }
1210
1211         if (is_terminal_input(c->std_input) ||
1212             c->std_output == EXEC_OUTPUT_TTY ||
1213             c->std_error == EXEC_OUTPUT_TTY ||
1214             c->tty_path) {
1215
1216                 x = strdup(default_term_for_tty(tty_path(c)));
1217                 if (!x)
1218                         return -ENOMEM;
1219                 our_env[n_env++] = x;
1220         }
1221
1222         our_env[n_env++] = NULL;
1223         assert(n_env <= 10);
1224
1225         *ret = our_env;
1226         our_env = NULL;
1227
1228         return 0;
1229 }
1230
1231 static int exec_child(ExecCommand *command,
1232                       const ExecContext *context,
1233                       const ExecParameters *params,
1234                       ExecRuntime *runtime,
1235                       char **argv,
1236                       int socket_fd,
1237                       int *fds, unsigned n_fds,
1238                       char **files_env,
1239                       int *error) {
1240
1241         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1242         _cleanup_free_ char *mac_selinux_context_net = NULL;
1243         const char *username = NULL, *home = NULL, *shell = NULL;
1244         unsigned n_dont_close = 0;
1245         int dont_close[n_fds + 4];
1246         uid_t uid = UID_INVALID;
1247         gid_t gid = GID_INVALID;
1248         int i, err;
1249
1250         assert(command);
1251         assert(context);
1252         assert(params);
1253         assert(error);
1254
1255         rename_process_from_path(command->path);
1256
1257         /* We reset exactly these signals, since they are the
1258          * only ones we set to SIG_IGN in the main daemon. All
1259          * others we leave untouched because we set them to
1260          * SIG_DFL or a valid handler initially, both of which
1261          * will be demoted to SIG_DFL. */
1262         default_signals(SIGNALS_CRASH_HANDLER,
1263                         SIGNALS_IGNORE, -1);
1264
1265         if (context->ignore_sigpipe)
1266                 ignore_signals(SIGPIPE, -1);
1267
1268         err = reset_signal_mask();
1269         if (err < 0) {
1270                 *error = EXIT_SIGNAL_MASK;
1271                 return err;
1272         }
1273
1274         if (params->idle_pipe)
1275                 do_idle_pipe_dance(params->idle_pipe);
1276
1277         /* Close sockets very early to make sure we don't
1278          * block init reexecution because it cannot bind its
1279          * sockets */
1280         log_forget_fds();
1281
1282         if (socket_fd >= 0)
1283                 dont_close[n_dont_close++] = socket_fd;
1284         if (n_fds > 0) {
1285                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1286                 n_dont_close += n_fds;
1287         }
1288         if (params->bus_endpoint_fd >= 0)
1289                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1290         if (runtime) {
1291                 if (runtime->netns_storage_socket[0] >= 0)
1292                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1293                 if (runtime->netns_storage_socket[1] >= 0)
1294                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1295         }
1296
1297         err = close_all_fds(dont_close, n_dont_close);
1298         if (err < 0) {
1299                 *error = EXIT_FDS;
1300                 return err;
1301         }
1302
1303         if (!context->same_pgrp)
1304                 if (setsid() < 0) {
1305                         *error = EXIT_SETSID;
1306                         return -errno;
1307                 }
1308
1309         exec_context_tty_reset(context);
1310
1311         if (params->confirm_spawn) {
1312                 char response;
1313
1314                 err = ask_for_confirmation(&response, argv);
1315                 if (err == -ETIMEDOUT)
1316                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1317                 else if (err < 0)
1318                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1319                 else if (response == 's') {
1320                         write_confirm_message("Skipping execution.\n");
1321                         *error = EXIT_CONFIRM;
1322                         return -ECANCELED;
1323                 } else if (response == 'n') {
1324                         write_confirm_message("Failing execution.\n");
1325                         *error = 0;
1326                         return 0;
1327                 }
1328         }
1329
1330         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1331          * must sure to drop O_NONBLOCK */
1332         if (socket_fd >= 0)
1333                 fd_nonblock(socket_fd, false);
1334
1335         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1336         if (err < 0) {
1337                 *error = EXIT_STDIN;
1338                 return err;
1339         }
1340
1341         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1342         if (err < 0) {
1343                 *error = EXIT_STDOUT;
1344                 return err;
1345         }
1346
1347         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1348         if (err < 0) {
1349                 *error = EXIT_STDERR;
1350                 return err;
1351         }
1352
1353         if (params->cgroup_path) {
1354                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1355                 if (err < 0) {
1356                         *error = EXIT_CGROUP;
1357                         return err;
1358                 }
1359         }
1360
1361         if (context->oom_score_adjust_set) {
1362                 char t[16];
1363
1364                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1365                 char_array_0(t);
1366
1367                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1368                         *error = EXIT_OOM_ADJUST;
1369                         return -errno;
1370                 }
1371         }
1372
1373         if (context->nice_set)
1374                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1375                         *error = EXIT_NICE;
1376                         return -errno;
1377                 }
1378
1379         if (context->cpu_sched_set) {
1380                 struct sched_param param = {
1381                         .sched_priority = context->cpu_sched_priority,
1382                 };
1383
1384                 err = sched_setscheduler(0,
1385                                          context->cpu_sched_policy |
1386                                          (context->cpu_sched_reset_on_fork ?
1387                                           SCHED_RESET_ON_FORK : 0),
1388                                          &param);
1389                 if (err < 0) {
1390                         *error = EXIT_SETSCHEDULER;
1391                         return -errno;
1392                 }
1393         }
1394
1395         if (context->cpuset)
1396                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1397                         *error = EXIT_CPUAFFINITY;
1398                         return -errno;
1399                 }
1400
1401         if (context->ioprio_set)
1402                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1403                         *error = EXIT_IOPRIO;
1404                         return -errno;
1405                 }
1406
1407         if (context->timer_slack_nsec != NSEC_INFINITY)
1408                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1409                         *error = EXIT_TIMERSLACK;
1410                         return -errno;
1411                 }
1412
1413         if (context->personality != 0xffffffffUL)
1414                 if (personality(context->personality) < 0) {
1415                         *error = EXIT_PERSONALITY;
1416                         return -errno;
1417                 }
1418
1419         if (context->utmp_id)
1420                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1421
1422         if (context->user) {
1423                 username = context->user;
1424                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1425                 if (err < 0) {
1426                         *error = EXIT_USER;
1427                         return err;
1428                 }
1429
1430                 if (is_terminal_input(context->std_input)) {
1431                         err = chown_terminal(STDIN_FILENO, uid);
1432                         if (err < 0) {
1433                                 *error = EXIT_STDIN;
1434                                 return err;
1435                         }
1436                 }
1437         }
1438
1439 #ifdef ENABLE_KDBUS
1440         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1441                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1442
1443                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1444                 if (err < 0) {
1445                         *error = EXIT_BUS_ENDPOINT;
1446                         return err;
1447                 }
1448         }
1449 #endif
1450
1451         /* If delegation is enabled we'll pass ownership of the cgroup
1452          * (but only in systemd's own controller hierarchy!) to the
1453          * user of the new process. */
1454         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1455                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1456                 if (err < 0) {
1457                         *error = EXIT_CGROUP;
1458                         return err;
1459                 }
1460
1461
1462                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1463                 if (err < 0) {
1464                         *error = EXIT_CGROUP;
1465                         return err;
1466                 }
1467         }
1468
1469         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1470                 char **rt;
1471
1472                 STRV_FOREACH(rt, context->runtime_directory) {
1473                         _cleanup_free_ char *p;
1474
1475                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1476                         if (!p) {
1477                                 *error = EXIT_RUNTIME_DIRECTORY;
1478                                 return -ENOMEM;
1479                         }
1480
1481                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1482                         if (err < 0) {
1483                                 *error = EXIT_RUNTIME_DIRECTORY;
1484                                 return err;
1485                         }
1486                 }
1487         }
1488
1489         if (params->apply_permissions) {
1490                 err = enforce_groups(context, username, gid);
1491                 if (err < 0) {
1492                         *error = EXIT_GROUP;
1493                         return err;
1494                 }
1495         }
1496
1497         umask(context->umask);
1498
1499 #ifdef HAVE_PAM
1500         if (params->apply_permissions && context->pam_name && username) {
1501                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1502                 if (err < 0) {
1503                         *error = EXIT_PAM;
1504                         return err;
1505                 }
1506         }
1507 #endif
1508
1509         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1510                 err = setup_netns(runtime->netns_storage_socket);
1511                 if (err < 0) {
1512                         *error = EXIT_NETWORK;
1513                         return err;
1514                 }
1515         }
1516
1517         if (!strv_isempty(context->read_write_dirs) ||
1518             !strv_isempty(context->read_only_dirs) ||
1519             !strv_isempty(context->inaccessible_dirs) ||
1520             context->mount_flags != 0 ||
1521             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1522             params->bus_endpoint_path ||
1523             context->private_devices ||
1524             context->protect_system != PROTECT_SYSTEM_NO ||
1525             context->protect_home != PROTECT_HOME_NO) {
1526
1527                 char *tmp = NULL, *var = NULL;
1528
1529                 /* The runtime struct only contains the parent
1530                  * of the private /tmp, which is
1531                  * non-accessible to world users. Inside of it
1532                  * there's a /tmp that is sticky, and that's
1533                  * the one we want to use here. */
1534
1535                 if (context->private_tmp && runtime) {
1536                         if (runtime->tmp_dir)
1537                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1538                         if (runtime->var_tmp_dir)
1539                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1540                 }
1541
1542                 err = setup_namespace(
1543                                 context->read_write_dirs,
1544                                 context->read_only_dirs,
1545                                 context->inaccessible_dirs,
1546                                 tmp,
1547                                 var,
1548                                 params->bus_endpoint_path,
1549                                 context->private_devices,
1550                                 context->protect_home,
1551                                 context->protect_system,
1552                                 context->mount_flags);
1553
1554                 if (err == -EPERM)
1555                         log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1556                 else if (err < 0) {
1557                         *error = EXIT_NAMESPACE;
1558                         return err;
1559                 }
1560         }
1561
1562         if (params->apply_chroot) {
1563                 if (context->root_directory)
1564                         if (chroot(context->root_directory) < 0) {
1565                                 *error = EXIT_CHROOT;
1566                                 return -errno;
1567                         }
1568
1569                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1570                         *error = EXIT_CHDIR;
1571                         return -errno;
1572                 }
1573         } else {
1574                 _cleanup_free_ char *d = NULL;
1575
1576                 if (asprintf(&d, "%s/%s",
1577                              context->root_directory ? context->root_directory : "",
1578                              context->working_directory ? context->working_directory : "") < 0) {
1579                         *error = EXIT_MEMORY;
1580                         return -ENOMEM;
1581                 }
1582
1583                 if (chdir(d) < 0) {
1584                         *error = EXIT_CHDIR;
1585                         return -errno;
1586                 }
1587         }
1588
1589 #ifdef HAVE_SELINUX
1590         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1591                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1592                 if (err < 0) {
1593                         *error = EXIT_SELINUX_CONTEXT;
1594                         return err;
1595                 }
1596         }
1597 #endif
1598
1599         /* We repeat the fd closing here, to make sure that
1600          * nothing is leaked from the PAM modules. Note that
1601          * we are more aggressive this time since socket_fd
1602          * and the netns fds we don't need anymore. The custom
1603          * endpoint fd was needed to upload the policy and can
1604          * now be closed as well. */
1605         err = close_all_fds(fds, n_fds);
1606         if (err >= 0)
1607                 err = shift_fds(fds, n_fds);
1608         if (err >= 0)
1609                 err = flags_fds(fds, n_fds, context->non_blocking);
1610         if (err < 0) {
1611                 *error = EXIT_FDS;
1612                 return err;
1613         }
1614
1615         if (params->apply_permissions) {
1616
1617                 for (i = 0; i < _RLIMIT_MAX; i++) {
1618                         if (!context->rlimit[i])
1619                                 continue;
1620
1621                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1622                                 *error = EXIT_LIMITS;
1623                                 return -errno;
1624                         }
1625                 }
1626
1627                 if (context->capability_bounding_set_drop) {
1628                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1629                         if (err < 0) {
1630                                 *error = EXIT_CAPABILITIES;
1631                                 return err;
1632                         }
1633                 }
1634
1635 #ifdef HAVE_SMACK
1636                 if (context->smack_process_label) {
1637                         err = mac_smack_apply_pid(0, context->smack_process_label);
1638                         if (err < 0) {
1639                                 *error = EXIT_SMACK_PROCESS_LABEL;
1640                                 return err;
1641                         }
1642                 }
1643 #endif
1644
1645                 if (context->user) {
1646                         err = enforce_user(context, uid);
1647                         if (err < 0) {
1648                                 *error = EXIT_USER;
1649                                 return err;
1650                         }
1651                 }
1652
1653                 /* PR_GET_SECUREBITS is not privileged, while
1654                  * PR_SET_SECUREBITS is. So to suppress
1655                  * potential EPERMs we'll try not to call
1656                  * PR_SET_SECUREBITS unless necessary. */
1657                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1658                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1659                                 *error = EXIT_SECUREBITS;
1660                                 return -errno;
1661                         }
1662
1663                 if (context->capabilities)
1664                         if (cap_set_proc(context->capabilities) < 0) {
1665                                 *error = EXIT_CAPABILITIES;
1666                                 return -errno;
1667                         }
1668
1669                 if (context->no_new_privileges)
1670                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1671                                 *error = EXIT_NO_NEW_PRIVILEGES;
1672                                 return -errno;
1673                         }
1674
1675 #ifdef HAVE_SECCOMP
1676                 if (context->address_families_whitelist ||
1677                     !set_isempty(context->address_families)) {
1678                         err = apply_address_families(context);
1679                         if (err < 0) {
1680                                 *error = EXIT_ADDRESS_FAMILIES;
1681                                 return err;
1682                         }
1683                 }
1684
1685                 if (context->syscall_whitelist ||
1686                     !set_isempty(context->syscall_filter) ||
1687                     !set_isempty(context->syscall_archs)) {
1688                         err = apply_seccomp(context);
1689                         if (err < 0) {
1690                                 *error = EXIT_SECCOMP;
1691                                 return err;
1692                         }
1693                 }
1694 #endif
1695
1696 #ifdef HAVE_SELINUX
1697                 if (mac_selinux_use()) {
1698                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1699
1700                         if (exec_context) {
1701                                 err = setexeccon(exec_context);
1702                                 if (err < 0) {
1703                                         *error = EXIT_SELINUX_CONTEXT;
1704                                         return err;
1705                                 }
1706                         }
1707                 }
1708 #endif
1709
1710 #ifdef HAVE_APPARMOR
1711                 if (context->apparmor_profile && mac_apparmor_use()) {
1712                         err = aa_change_onexec(context->apparmor_profile);
1713                         if (err < 0 && !context->apparmor_profile_ignore) {
1714                                 *error = EXIT_APPARMOR_PROFILE;
1715                                 return -errno;
1716                         }
1717                 }
1718 #endif
1719         }
1720
1721         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1722         if (err < 0) {
1723                 *error = EXIT_MEMORY;
1724                 return err;
1725         }
1726
1727         final_env = strv_env_merge(5,
1728                                    params->environment,
1729                                    our_env,
1730                                    context->environment,
1731                                    files_env,
1732                                    pam_env,
1733                                    NULL);
1734         if (!final_env) {
1735                 *error = EXIT_MEMORY;
1736                 return -ENOMEM;
1737         }
1738
1739         final_argv = replace_env_argv(argv, final_env);
1740         if (!final_argv) {
1741                 *error = EXIT_MEMORY;
1742                 return -ENOMEM;
1743         }
1744
1745         final_env = strv_env_clean(final_env);
1746
1747         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1748                 _cleanup_free_ char *line;
1749
1750                 line = exec_command_line(final_argv);
1751                 if (line) {
1752                         log_open();
1753                         log_unit_struct(params->unit_id,
1754                                         LOG_DEBUG,
1755                                         "EXECUTABLE=%s", command->path,
1756                                         LOG_MESSAGE("Executing: %s", line),
1757                                         NULL);
1758                         log_close();
1759                 }
1760         }
1761         execve(command->path, final_argv, final_env);
1762         *error = EXIT_EXEC;
1763         return -errno;
1764 }
1765
1766 int exec_spawn(ExecCommand *command,
1767                const ExecContext *context,
1768                const ExecParameters *params,
1769                ExecRuntime *runtime,
1770                pid_t *ret) {
1771
1772         _cleanup_strv_free_ char **files_env = NULL;
1773         int *fds = NULL; unsigned n_fds = 0;
1774         char *line, **argv;
1775         int socket_fd;
1776         pid_t pid;
1777         int err;
1778
1779         assert(command);
1780         assert(context);
1781         assert(ret);
1782         assert(params);
1783         assert(params->fds || params->n_fds <= 0);
1784
1785         if (context->std_input == EXEC_INPUT_SOCKET ||
1786             context->std_output == EXEC_OUTPUT_SOCKET ||
1787             context->std_error == EXEC_OUTPUT_SOCKET) {
1788
1789                 if (params->n_fds != 1)
1790                         return -EINVAL;
1791
1792                 socket_fd = params->fds[0];
1793         } else {
1794                 socket_fd = -1;
1795                 fds = params->fds;
1796                 n_fds = params->n_fds;
1797         }
1798
1799         err = exec_context_load_environment(context, params->unit_id, &files_env);
1800         if (err < 0) {
1801                 log_unit_struct(params->unit_id,
1802                                 LOG_ERR,
1803                                 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1804                                 LOG_ERRNO(-err),
1805                                 NULL);
1806                 return err;
1807         }
1808
1809         argv = params->argv ?: command->argv;
1810
1811         line = exec_command_line(argv);
1812         if (!line)
1813                 return log_oom();
1814
1815         log_unit_struct(params->unit_id,
1816                         LOG_DEBUG,
1817                         "EXECUTABLE=%s", command->path,
1818                         LOG_MESSAGE("About to execute: %s", line),
1819                         NULL);
1820         free(line);
1821
1822         pid = fork();
1823         if (pid < 0)
1824                 return -errno;
1825
1826         if (pid == 0) {
1827                 int r;
1828
1829                 err = exec_child(command,
1830                                  context,
1831                                  params,
1832                                  runtime,
1833                                  argv,
1834                                  socket_fd,
1835                                  fds, n_fds,
1836                                  files_env,
1837                                  &r);
1838                 if (r != 0) {
1839                         log_open();
1840                         log_struct(LOG_ERR,
1841                                    LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1842                                    "EXECUTABLE=%s", command->path,
1843                                    LOG_MESSAGE("Failed at step %s spawning %s: %s",
1844                                                exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1845                                                command->path, strerror(-err)),
1846                                    LOG_ERRNO(-err),
1847                                    NULL);
1848                         log_close();
1849                 }
1850
1851                 _exit(r);
1852         }
1853
1854         log_unit_struct(params->unit_id,
1855                         LOG_DEBUG,
1856                         LOG_MESSAGE("Forked %s as "PID_FMT,
1857                                     command->path, pid),
1858                         NULL);
1859
1860         /* We add the new process to the cgroup both in the child (so
1861          * that we can be sure that no user code is ever executed
1862          * outside of the cgroup) and in the parent (so that we can be
1863          * sure that when we kill the cgroup the process will be
1864          * killed too). */
1865         if (params->cgroup_path)
1866                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1867
1868         exec_status_start(&command->exec_status, pid);
1869
1870         *ret = pid;
1871         return 0;
1872 }
1873
1874 void exec_context_init(ExecContext *c) {
1875         assert(c);
1876
1877         c->umask = 0022;
1878         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1879         c->cpu_sched_policy = SCHED_OTHER;
1880         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1881         c->syslog_level_prefix = true;
1882         c->ignore_sigpipe = true;
1883         c->timer_slack_nsec = NSEC_INFINITY;
1884         c->personality = 0xffffffffUL;
1885         c->runtime_directory_mode = 0755;
1886 }
1887
1888 void exec_context_done(ExecContext *c) {
1889         unsigned l;
1890
1891         assert(c);
1892
1893         strv_free(c->environment);
1894         c->environment = NULL;
1895
1896         strv_free(c->environment_files);
1897         c->environment_files = NULL;
1898
1899         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1900                 free(c->rlimit[l]);
1901                 c->rlimit[l] = NULL;
1902         }
1903
1904         free(c->working_directory);
1905         c->working_directory = NULL;
1906         free(c->root_directory);
1907         c->root_directory = NULL;
1908
1909         free(c->tty_path);
1910         c->tty_path = NULL;
1911
1912         free(c->syslog_identifier);
1913         c->syslog_identifier = NULL;
1914
1915         free(c->user);
1916         c->user = NULL;
1917
1918         free(c->group);
1919         c->group = NULL;
1920
1921         strv_free(c->supplementary_groups);
1922         c->supplementary_groups = NULL;
1923
1924         free(c->pam_name);
1925         c->pam_name = NULL;
1926
1927         if (c->capabilities) {
1928                 cap_free(c->capabilities);
1929                 c->capabilities = NULL;
1930         }
1931
1932         strv_free(c->read_only_dirs);
1933         c->read_only_dirs = NULL;
1934
1935         strv_free(c->read_write_dirs);
1936         c->read_write_dirs = NULL;
1937
1938         strv_free(c->inaccessible_dirs);
1939         c->inaccessible_dirs = NULL;
1940
1941         if (c->cpuset)
1942                 CPU_FREE(c->cpuset);
1943
1944         free(c->utmp_id);
1945         c->utmp_id = NULL;
1946
1947         free(c->selinux_context);
1948         c->selinux_context = NULL;
1949
1950         free(c->apparmor_profile);
1951         c->apparmor_profile = NULL;
1952
1953         set_free(c->syscall_filter);
1954         c->syscall_filter = NULL;
1955
1956         set_free(c->syscall_archs);
1957         c->syscall_archs = NULL;
1958
1959         set_free(c->address_families);
1960         c->address_families = NULL;
1961
1962         strv_free(c->runtime_directory);
1963         c->runtime_directory = NULL;
1964
1965         bus_endpoint_free(c->bus_endpoint);
1966         c->bus_endpoint = NULL;
1967 }
1968
1969 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1970         char **i;
1971
1972         assert(c);
1973
1974         if (!runtime_prefix)
1975                 return 0;
1976
1977         STRV_FOREACH(i, c->runtime_directory) {
1978                 _cleanup_free_ char *p;
1979
1980                 p = strjoin(runtime_prefix, "/", *i, NULL);
1981                 if (!p)
1982                         return -ENOMEM;
1983
1984                 /* We execute this synchronously, since we need to be
1985                  * sure this is gone when we start the service
1986                  * next. */
1987                 rm_rf(p, false, true, false);
1988         }
1989
1990         return 0;
1991 }
1992
1993 void exec_command_done(ExecCommand *c) {
1994         assert(c);
1995
1996         free(c->path);
1997         c->path = NULL;
1998
1999         strv_free(c->argv);
2000         c->argv = NULL;
2001 }
2002
2003 void exec_command_done_array(ExecCommand *c, unsigned n) {
2004         unsigned i;
2005
2006         for (i = 0; i < n; i++)
2007                 exec_command_done(c+i);
2008 }
2009
2010 ExecCommand* exec_command_free_list(ExecCommand *c) {
2011         ExecCommand *i;
2012
2013         while ((i = c)) {
2014                 LIST_REMOVE(command, c, i);
2015                 exec_command_done(i);
2016                 free(i);
2017         }
2018
2019         return NULL;
2020 }
2021
2022 void exec_command_free_array(ExecCommand **c, unsigned n) {
2023         unsigned i;
2024
2025         for (i = 0; i < n; i++)
2026                 c[i] = exec_command_free_list(c[i]);
2027 }
2028
2029 typedef struct InvalidEnvInfo {
2030         const char *unit_id;
2031         const char *path;
2032 } InvalidEnvInfo;
2033
2034 static void invalid_env(const char *p, void *userdata) {
2035         InvalidEnvInfo *info = userdata;
2036
2037         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2038 }
2039
2040 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2041         char **i, **r = NULL;
2042
2043         assert(c);
2044         assert(l);
2045
2046         STRV_FOREACH(i, c->environment_files) {
2047                 char *fn;
2048                 int k;
2049                 bool ignore = false;
2050                 char **p;
2051                 _cleanup_globfree_ glob_t pglob = {};
2052                 int count, n;
2053
2054                 fn = *i;
2055
2056                 if (fn[0] == '-') {
2057                         ignore = true;
2058                         fn ++;
2059                 }
2060
2061                 if (!path_is_absolute(fn)) {
2062                         if (ignore)
2063                                 continue;
2064
2065                         strv_free(r);
2066                         return -EINVAL;
2067                 }
2068
2069                 /* Filename supports globbing, take all matching files */
2070                 errno = 0;
2071                 if (glob(fn, 0, NULL, &pglob) != 0) {
2072                         if (ignore)
2073                                 continue;
2074
2075                         strv_free(r);
2076                         return errno ? -errno : -EINVAL;
2077                 }
2078                 count = pglob.gl_pathc;
2079                 if (count == 0) {
2080                         if (ignore)
2081                                 continue;
2082
2083                         strv_free(r);
2084                         return -EINVAL;
2085                 }
2086                 for (n = 0; n < count; n++) {
2087                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2088                         if (k < 0) {
2089                                 if (ignore)
2090                                         continue;
2091
2092                                 strv_free(r);
2093                                 return k;
2094                         }
2095                         /* Log invalid environment variables with filename */
2096                         if (p) {
2097                                 InvalidEnvInfo info = {
2098                                         .unit_id = unit_id,
2099                                         .path = pglob.gl_pathv[n]
2100                                 };
2101
2102                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2103                         }
2104
2105                         if (r == NULL)
2106                                 r = p;
2107                         else {
2108                                 char **m;
2109
2110                                 m = strv_env_merge(2, r, p);
2111                                 strv_free(r);
2112                                 strv_free(p);
2113                                 if (!m)
2114                                         return -ENOMEM;
2115
2116                                 r = m;
2117                         }
2118                 }
2119         }
2120
2121         *l = r;
2122
2123         return 0;
2124 }
2125
2126 static bool tty_may_match_dev_console(const char *tty) {
2127         _cleanup_free_ char *active = NULL;
2128        char *console;
2129
2130         if (startswith(tty, "/dev/"))
2131                 tty += 5;
2132
2133         /* trivial identity? */
2134         if (streq(tty, "console"))
2135                 return true;
2136
2137         console = resolve_dev_console(&active);
2138         /* if we could not resolve, assume it may */
2139         if (!console)
2140                 return true;
2141
2142         /* "tty0" means the active VC, so it may be the same sometimes */
2143         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2144 }
2145
2146 bool exec_context_may_touch_console(ExecContext *ec) {
2147         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2148                 is_terminal_input(ec->std_input) ||
2149                 is_terminal_output(ec->std_output) ||
2150                 is_terminal_output(ec->std_error)) &&
2151                tty_may_match_dev_console(tty_path(ec));
2152 }
2153
2154 static void strv_fprintf(FILE *f, char **l) {
2155         char **g;
2156
2157         assert(f);
2158
2159         STRV_FOREACH(g, l)
2160                 fprintf(f, " %s", *g);
2161 }
2162
2163 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2164         char **e;
2165         unsigned i;
2166
2167         assert(c);
2168         assert(f);
2169
2170         prefix = strempty(prefix);
2171
2172         fprintf(f,
2173                 "%sUMask: %04o\n"
2174                 "%sWorkingDirectory: %s\n"
2175                 "%sRootDirectory: %s\n"
2176                 "%sNonBlocking: %s\n"
2177                 "%sPrivateTmp: %s\n"
2178                 "%sPrivateNetwork: %s\n"
2179                 "%sPrivateDevices: %s\n"
2180                 "%sProtectHome: %s\n"
2181                 "%sProtectSystem: %s\n"
2182                 "%sIgnoreSIGPIPE: %s\n",
2183                 prefix, c->umask,
2184                 prefix, c->working_directory ? c->working_directory : "/",
2185                 prefix, c->root_directory ? c->root_directory : "/",
2186                 prefix, yes_no(c->non_blocking),
2187                 prefix, yes_no(c->private_tmp),
2188                 prefix, yes_no(c->private_network),
2189                 prefix, yes_no(c->private_devices),
2190                 prefix, protect_home_to_string(c->protect_home),
2191                 prefix, protect_system_to_string(c->protect_system),
2192                 prefix, yes_no(c->ignore_sigpipe));
2193
2194         STRV_FOREACH(e, c->environment)
2195                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2196
2197         STRV_FOREACH(e, c->environment_files)
2198                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2199
2200         if (c->nice_set)
2201                 fprintf(f,
2202                         "%sNice: %i\n",
2203                         prefix, c->nice);
2204
2205         if (c->oom_score_adjust_set)
2206                 fprintf(f,
2207                         "%sOOMScoreAdjust: %i\n",
2208                         prefix, c->oom_score_adjust);
2209
2210         for (i = 0; i < RLIM_NLIMITS; i++)
2211                 if (c->rlimit[i])
2212                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2213                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2214
2215         if (c->ioprio_set) {
2216                 _cleanup_free_ char *class_str = NULL;
2217
2218                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2219                 fprintf(f,
2220                         "%sIOSchedulingClass: %s\n"
2221                         "%sIOPriority: %i\n",
2222                         prefix, strna(class_str),
2223                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2224         }
2225
2226         if (c->cpu_sched_set) {
2227                 _cleanup_free_ char *policy_str = NULL;
2228
2229                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2230                 fprintf(f,
2231                         "%sCPUSchedulingPolicy: %s\n"
2232                         "%sCPUSchedulingPriority: %i\n"
2233                         "%sCPUSchedulingResetOnFork: %s\n",
2234                         prefix, strna(policy_str),
2235                         prefix, c->cpu_sched_priority,
2236                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2237         }
2238
2239         if (c->cpuset) {
2240                 fprintf(f, "%sCPUAffinity:", prefix);
2241                 for (i = 0; i < c->cpuset_ncpus; i++)
2242                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2243                                 fprintf(f, " %u", i);
2244                 fputs("\n", f);
2245         }
2246
2247         if (c->timer_slack_nsec != NSEC_INFINITY)
2248                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2249
2250         fprintf(f,
2251                 "%sStandardInput: %s\n"
2252                 "%sStandardOutput: %s\n"
2253                 "%sStandardError: %s\n",
2254                 prefix, exec_input_to_string(c->std_input),
2255                 prefix, exec_output_to_string(c->std_output),
2256                 prefix, exec_output_to_string(c->std_error));
2257
2258         if (c->tty_path)
2259                 fprintf(f,
2260                         "%sTTYPath: %s\n"
2261                         "%sTTYReset: %s\n"
2262                         "%sTTYVHangup: %s\n"
2263                         "%sTTYVTDisallocate: %s\n",
2264                         prefix, c->tty_path,
2265                         prefix, yes_no(c->tty_reset),
2266                         prefix, yes_no(c->tty_vhangup),
2267                         prefix, yes_no(c->tty_vt_disallocate));
2268
2269         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2270             c->std_output == EXEC_OUTPUT_KMSG ||
2271             c->std_output == EXEC_OUTPUT_JOURNAL ||
2272             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2273             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2274             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2275             c->std_error == EXEC_OUTPUT_SYSLOG ||
2276             c->std_error == EXEC_OUTPUT_KMSG ||
2277             c->std_error == EXEC_OUTPUT_JOURNAL ||
2278             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2279             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2280             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2281
2282                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2283
2284                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2285                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2286
2287                 fprintf(f,
2288                         "%sSyslogFacility: %s\n"
2289                         "%sSyslogLevel: %s\n",
2290                         prefix, strna(fac_str),
2291                         prefix, strna(lvl_str));
2292         }
2293
2294         if (c->capabilities) {
2295                 _cleanup_cap_free_charp_ char *t;
2296
2297                 t = cap_to_text(c->capabilities, NULL);
2298                 if (t)
2299                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2300         }
2301
2302         if (c->secure_bits)
2303                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2304                         prefix,
2305                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2306                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2307                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2308                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2309                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2310                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2311
2312         if (c->capability_bounding_set_drop) {
2313                 unsigned long l;
2314                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2315
2316                 for (l = 0; l <= cap_last_cap(); l++)
2317                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2318                                 fprintf(f, " %s", strna(capability_to_name(l)));
2319
2320                 fputs("\n", f);
2321         }
2322
2323         if (c->user)
2324                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2325         if (c->group)
2326                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2327
2328         if (strv_length(c->supplementary_groups) > 0) {
2329                 fprintf(f, "%sSupplementaryGroups:", prefix);
2330                 strv_fprintf(f, c->supplementary_groups);
2331                 fputs("\n", f);
2332         }
2333
2334         if (c->pam_name)
2335                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2336
2337         if (strv_length(c->read_write_dirs) > 0) {
2338                 fprintf(f, "%sReadWriteDirs:", prefix);
2339                 strv_fprintf(f, c->read_write_dirs);
2340                 fputs("\n", f);
2341         }
2342
2343         if (strv_length(c->read_only_dirs) > 0) {
2344                 fprintf(f, "%sReadOnlyDirs:", prefix);
2345                 strv_fprintf(f, c->read_only_dirs);
2346                 fputs("\n", f);
2347         }
2348
2349         if (strv_length(c->inaccessible_dirs) > 0) {
2350                 fprintf(f, "%sInaccessibleDirs:", prefix);
2351                 strv_fprintf(f, c->inaccessible_dirs);
2352                 fputs("\n", f);
2353         }
2354
2355         if (c->utmp_id)
2356                 fprintf(f,
2357                         "%sUtmpIdentifier: %s\n",
2358                         prefix, c->utmp_id);
2359
2360         if (c->selinux_context)
2361                 fprintf(f,
2362                         "%sSELinuxContext: %s%s\n",
2363                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2364
2365         if (c->personality != 0xffffffffUL)
2366                 fprintf(f,
2367                         "%sPersonality: %s\n",
2368                         prefix, strna(personality_to_string(c->personality)));
2369
2370         if (c->syscall_filter) {
2371 #ifdef HAVE_SECCOMP
2372                 Iterator j;
2373                 void *id;
2374                 bool first = true;
2375 #endif
2376
2377                 fprintf(f,
2378                         "%sSystemCallFilter: ",
2379                         prefix);
2380
2381                 if (!c->syscall_whitelist)
2382                         fputc('~', f);
2383
2384 #ifdef HAVE_SECCOMP
2385                 SET_FOREACH(id, c->syscall_filter, j) {
2386                         _cleanup_free_ char *name = NULL;
2387
2388                         if (first)
2389                                 first = false;
2390                         else
2391                                 fputc(' ', f);
2392
2393                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2394                         fputs(strna(name), f);
2395                 }
2396 #endif
2397
2398                 fputc('\n', f);
2399         }
2400
2401         if (c->syscall_archs) {
2402 #ifdef HAVE_SECCOMP
2403                 Iterator j;
2404                 void *id;
2405 #endif
2406
2407                 fprintf(f,
2408                         "%sSystemCallArchitectures:",
2409                         prefix);
2410
2411 #ifdef HAVE_SECCOMP
2412                 SET_FOREACH(id, c->syscall_archs, j)
2413                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2414 #endif
2415                 fputc('\n', f);
2416         }
2417
2418         if (c->syscall_errno != 0)
2419                 fprintf(f,
2420                         "%sSystemCallErrorNumber: %s\n",
2421                         prefix, strna(errno_to_name(c->syscall_errno)));
2422
2423         if (c->apparmor_profile)
2424                 fprintf(f,
2425                         "%sAppArmorProfile: %s%s\n",
2426                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2427 }
2428
2429 bool exec_context_maintains_privileges(ExecContext *c) {
2430         assert(c);
2431
2432         /* Returns true if the process forked off would run run under
2433          * an unchanged UID or as root. */
2434
2435         if (!c->user)
2436                 return true;
2437
2438         if (streq(c->user, "root") || streq(c->user, "0"))
2439                 return true;
2440
2441         return false;
2442 }
2443
2444 void exec_status_start(ExecStatus *s, pid_t pid) {
2445         assert(s);
2446
2447         zero(*s);
2448         s->pid = pid;
2449         dual_timestamp_get(&s->start_timestamp);
2450 }
2451
2452 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2453         assert(s);
2454
2455         if (s->pid && s->pid != pid)
2456                 zero(*s);
2457
2458         s->pid = pid;
2459         dual_timestamp_get(&s->exit_timestamp);
2460
2461         s->code = code;
2462         s->status = status;
2463
2464         if (context) {
2465                 if (context->utmp_id)
2466                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2467
2468                 exec_context_tty_reset(context);
2469         }
2470 }
2471
2472 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2473         char buf[FORMAT_TIMESTAMP_MAX];
2474
2475         assert(s);
2476         assert(f);
2477
2478         if (s->pid <= 0)
2479                 return;
2480
2481         prefix = strempty(prefix);
2482
2483         fprintf(f,
2484                 "%sPID: "PID_FMT"\n",
2485                 prefix, s->pid);
2486
2487         if (s->start_timestamp.realtime > 0)
2488                 fprintf(f,
2489                         "%sStart Timestamp: %s\n",
2490                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2491
2492         if (s->exit_timestamp.realtime > 0)
2493                 fprintf(f,
2494                         "%sExit Timestamp: %s\n"
2495                         "%sExit Code: %s\n"
2496                         "%sExit Status: %i\n",
2497                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2498                         prefix, sigchld_code_to_string(s->code),
2499                         prefix, s->status);
2500 }
2501
2502 char *exec_command_line(char **argv) {
2503         size_t k;
2504         char *n, *p, **a;
2505         bool first = true;
2506
2507         assert(argv);
2508
2509         k = 1;
2510         STRV_FOREACH(a, argv)
2511                 k += strlen(*a)+3;
2512
2513         if (!(n = new(char, k)))
2514                 return NULL;
2515
2516         p = n;
2517         STRV_FOREACH(a, argv) {
2518
2519                 if (!first)
2520                         *(p++) = ' ';
2521                 else
2522                         first = false;
2523
2524                 if (strpbrk(*a, WHITESPACE)) {
2525                         *(p++) = '\'';
2526                         p = stpcpy(p, *a);
2527                         *(p++) = '\'';
2528                 } else
2529                         p = stpcpy(p, *a);
2530
2531         }
2532
2533         *p = 0;
2534
2535         /* FIXME: this doesn't really handle arguments that have
2536          * spaces and ticks in them */
2537
2538         return n;
2539 }
2540
2541 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2542         _cleanup_free_ char *cmd = NULL;
2543         const char *prefix2;
2544
2545         assert(c);
2546         assert(f);
2547
2548         prefix = strempty(prefix);
2549         prefix2 = strappenda(prefix, "\t");
2550
2551         cmd = exec_command_line(c->argv);
2552         fprintf(f,
2553                 "%sCommand Line: %s\n",
2554                 prefix, cmd ? cmd : strerror(ENOMEM));
2555
2556         exec_status_dump(&c->exec_status, f, prefix2);
2557 }
2558
2559 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2560         assert(f);
2561
2562         prefix = strempty(prefix);
2563
2564         LIST_FOREACH(command, c, c)
2565                 exec_command_dump(c, f, prefix);
2566 }
2567
2568 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2569         ExecCommand *end;
2570
2571         assert(l);
2572         assert(e);
2573
2574         if (*l) {
2575                 /* It's kind of important, that we keep the order here */
2576                 LIST_FIND_TAIL(command, *l, end);
2577                 LIST_INSERT_AFTER(command, *l, end, e);
2578         } else
2579               *l = e;
2580 }
2581
2582 int exec_command_set(ExecCommand *c, const char *path, ...) {
2583         va_list ap;
2584         char **l, *p;
2585
2586         assert(c);
2587         assert(path);
2588
2589         va_start(ap, path);
2590         l = strv_new_ap(path, ap);
2591         va_end(ap);
2592
2593         if (!l)
2594                 return -ENOMEM;
2595
2596         p = strdup(path);
2597         if (!p) {
2598                 strv_free(l);
2599                 return -ENOMEM;
2600         }
2601
2602         free(c->path);
2603         c->path = p;
2604
2605         strv_free(c->argv);
2606         c->argv = l;
2607
2608         return 0;
2609 }
2610
2611 int exec_command_append(ExecCommand *c, const char *path, ...) {
2612         _cleanup_strv_free_ char **l = NULL;
2613         va_list ap;
2614         int r;
2615
2616         assert(c);
2617         assert(path);
2618
2619         va_start(ap, path);
2620         l = strv_new_ap(path, ap);
2621         va_end(ap);
2622
2623         if (!l)
2624                 return -ENOMEM;
2625
2626         r = strv_extend_strv(&c->argv, l);
2627         if (r < 0)
2628                 return r;
2629
2630         return 0;
2631 }
2632
2633
2634 static int exec_runtime_allocate(ExecRuntime **rt) {
2635
2636         if (*rt)
2637                 return 0;
2638
2639         *rt = new0(ExecRuntime, 1);
2640         if (!*rt)
2641                 return -ENOMEM;
2642
2643         (*rt)->n_ref = 1;
2644         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2645
2646         return 0;
2647 }
2648
2649 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2650         int r;
2651
2652         assert(rt);
2653         assert(c);
2654         assert(id);
2655
2656         if (*rt)
2657                 return 1;
2658
2659         if (!c->private_network && !c->private_tmp)
2660                 return 0;
2661
2662         r = exec_runtime_allocate(rt);
2663         if (r < 0)
2664                 return r;
2665
2666         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2667                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2668                         return -errno;
2669         }
2670
2671         if (c->private_tmp && !(*rt)->tmp_dir) {
2672                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2673                 if (r < 0)
2674                         return r;
2675         }
2676
2677         return 1;
2678 }
2679
2680 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2681         assert(r);
2682         assert(r->n_ref > 0);
2683
2684         r->n_ref++;
2685         return r;
2686 }
2687
2688 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2689
2690         if (!r)
2691                 return NULL;
2692
2693         assert(r->n_ref > 0);
2694
2695         r->n_ref--;
2696         if (r->n_ref <= 0) {
2697                 free(r->tmp_dir);
2698                 free(r->var_tmp_dir);
2699                 safe_close_pair(r->netns_storage_socket);
2700                 free(r);
2701         }
2702
2703         return NULL;
2704 }
2705
2706 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2707         assert(u);
2708         assert(f);
2709         assert(fds);
2710
2711         if (!rt)
2712                 return 0;
2713
2714         if (rt->tmp_dir)
2715                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2716
2717         if (rt->var_tmp_dir)
2718                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2719
2720         if (rt->netns_storage_socket[0] >= 0) {
2721                 int copy;
2722
2723                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2724                 if (copy < 0)
2725                         return copy;
2726
2727                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2728         }
2729
2730         if (rt->netns_storage_socket[1] >= 0) {
2731                 int copy;
2732
2733                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2734                 if (copy < 0)
2735                         return copy;
2736
2737                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2738         }
2739
2740         return 0;
2741 }
2742
2743 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2744         int r;
2745
2746         assert(rt);
2747         assert(key);
2748         assert(value);
2749
2750         if (streq(key, "tmp-dir")) {
2751                 char *copy;
2752
2753                 r = exec_runtime_allocate(rt);
2754                 if (r < 0)
2755                         return r;
2756
2757                 copy = strdup(value);
2758                 if (!copy)
2759                         return log_oom();
2760
2761                 free((*rt)->tmp_dir);
2762                 (*rt)->tmp_dir = copy;
2763
2764         } else if (streq(key, "var-tmp-dir")) {
2765                 char *copy;
2766
2767                 r = exec_runtime_allocate(rt);
2768                 if (r < 0)
2769                         return r;
2770
2771                 copy = strdup(value);
2772                 if (!copy)
2773                         return log_oom();
2774
2775                 free((*rt)->var_tmp_dir);
2776                 (*rt)->var_tmp_dir = copy;
2777
2778         } else if (streq(key, "netns-socket-0")) {
2779                 int fd;
2780
2781                 r = exec_runtime_allocate(rt);
2782                 if (r < 0)
2783                         return r;
2784
2785                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2786                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2787                 else {
2788                         safe_close((*rt)->netns_storage_socket[0]);
2789                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2790                 }
2791         } else if (streq(key, "netns-socket-1")) {
2792                 int fd;
2793
2794                 r = exec_runtime_allocate(rt);
2795                 if (r < 0)
2796                         return r;
2797
2798                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2799                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2800                 else {
2801                         safe_close((*rt)->netns_storage_socket[1]);
2802                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2803                 }
2804         } else
2805                 return 0;
2806
2807         return 1;
2808 }
2809
2810 static void *remove_tmpdir_thread(void *p) {
2811         _cleanup_free_ char *path = p;
2812
2813         rm_rf_dangerous(path, false, true, false);
2814         return NULL;
2815 }
2816
2817 void exec_runtime_destroy(ExecRuntime *rt) {
2818         int r;
2819
2820         if (!rt)
2821                 return;
2822
2823         /* If there are multiple users of this, let's leave the stuff around */
2824         if (rt->n_ref > 1)
2825                 return;
2826
2827         if (rt->tmp_dir) {
2828                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2829
2830                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2831                 if (r < 0) {
2832                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2833                         free(rt->tmp_dir);
2834                 }
2835
2836                 rt->tmp_dir = NULL;
2837         }
2838
2839         if (rt->var_tmp_dir) {
2840                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2841
2842                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2843                 if (r < 0) {
2844                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2845                         free(rt->var_tmp_dir);
2846                 }
2847
2848                 rt->var_tmp_dir = NULL;
2849         }
2850
2851         safe_close_pair(rt->netns_storage_socket);
2852 }
2853
2854 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2855         [EXEC_INPUT_NULL] = "null",
2856         [EXEC_INPUT_TTY] = "tty",
2857         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2858         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2859         [EXEC_INPUT_SOCKET] = "socket"
2860 };
2861
2862 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2863
2864 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2865         [EXEC_OUTPUT_INHERIT] = "inherit",
2866         [EXEC_OUTPUT_NULL] = "null",
2867         [EXEC_OUTPUT_TTY] = "tty",
2868         [EXEC_OUTPUT_SYSLOG] = "syslog",
2869         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2870         [EXEC_OUTPUT_KMSG] = "kmsg",
2871         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2872         [EXEC_OUTPUT_JOURNAL] = "journal",
2873         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2874         [EXEC_OUTPUT_SOCKET] = "socket"
2875 };
2876
2877 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);