chiark / gitweb /
core: rearrange code so that libsystemd/sd-bus/ does not include header files from...
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104         int start, restart_from;
105
106         if (n_fds <= 0)
107                 return 0;
108
109         /* Modifies the fds array! (sorts it) */
110
111         assert(fds);
112
113         start = 0;
114         for (;;) {
115                 int i;
116
117                 restart_from = -1;
118
119                 for (i = start; i < (int) n_fds; i++) {
120                         int nfd;
121
122                         /* Already at right index? */
123                         if (fds[i] == i+3)
124                                 continue;
125
126                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127                                 return -errno;
128
129                         safe_close(fds[i]);
130                         fds[i] = nfd;
131
132                         /* Hmm, the fd we wanted isn't free? Then
133                          * let's remember that and try again from here */
134                         if (nfd != i+3 && restart_from < 0)
135                                 restart_from = i;
136                 }
137
138                 if (restart_from < 0)
139                         break;
140
141                 start = restart_from;
142         }
143
144         return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148         unsigned i;
149         int r;
150
151         if (n_fds <= 0)
152                 return 0;
153
154         assert(fds);
155
156         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158         for (i = 0; i < n_fds; i++) {
159
160                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161                         return r;
162
163                 /* We unconditionally drop FD_CLOEXEC from the fds,
164                  * since after all we want to pass these fds to our
165                  * children */
166
167                 if ((r = fd_cloexec(fds[i], false)) < 0)
168                         return r;
169         }
170
171         return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175         assert(context);
176
177         if (context->tty_path)
178                 return context->tty_path;
179
180         return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184         assert(context);
185
186         if (context->tty_vhangup)
187                 terminal_vhangup(tty_path(context));
188
189         if (context->tty_reset)
190                 reset_terminal(tty_path(context));
191
192         if (context->tty_vt_disallocate && context->tty_path)
193                 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197         return
198                 o == EXEC_OUTPUT_TTY ||
199                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205         int fd, r;
206
207         assert(nfd >= 0);
208
209         fd = open("/dev/null", flags|O_NOCTTY);
210         if (fd < 0)
211                 return -errno;
212
213         if (fd != nfd) {
214                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215                 safe_close(fd);
216         } else
217                 r = nfd;
218
219         return r;
220 }
221
222 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
223         int fd, r;
224         union sockaddr_union sa = {
225                 .un.sun_family = AF_UNIX,
226                 .un.sun_path = "/run/systemd/journal/stdout",
227         };
228
229         assert(context);
230         assert(output < _EXEC_OUTPUT_MAX);
231         assert(ident);
232         assert(nfd >= 0);
233
234         fd = socket(AF_UNIX, SOCK_STREAM, 0);
235         if (fd < 0)
236                 return -errno;
237
238         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
239         if (r < 0) {
240                 safe_close(fd);
241                 return -errno;
242         }
243
244         if (shutdown(fd, SHUT_RD) < 0) {
245                 safe_close(fd);
246                 return -errno;
247         }
248
249         fd_inc_sndbuf(fd, SNDBUF_SIZE);
250
251         dprintf(fd,
252                 "%s\n"
253                 "%s\n"
254                 "%i\n"
255                 "%i\n"
256                 "%i\n"
257                 "%i\n"
258                 "%i\n",
259                 context->syslog_identifier ? context->syslog_identifier : ident,
260                 unit_id,
261                 context->syslog_priority,
262                 !!context->syslog_level_prefix,
263                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
264                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
265                 is_terminal_output(output));
266
267         if (fd != nfd) {
268                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
269                 safe_close(fd);
270         } else
271                 r = nfd;
272
273         return r;
274 }
275 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
276         int fd, r;
277
278         assert(path);
279         assert(nfd >= 0);
280
281         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
282                 return fd;
283
284         if (fd != nfd) {
285                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
286                 safe_close(fd);
287         } else
288                 r = nfd;
289
290         return r;
291 }
292
293 static bool is_terminal_input(ExecInput i) {
294         return
295                 i == EXEC_INPUT_TTY ||
296                 i == EXEC_INPUT_TTY_FORCE ||
297                 i == EXEC_INPUT_TTY_FAIL;
298 }
299
300 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
301
302         if (is_terminal_input(std_input) && !apply_tty_stdin)
303                 return EXEC_INPUT_NULL;
304
305         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
306                 return EXEC_INPUT_NULL;
307
308         return std_input;
309 }
310
311 static int fixup_output(ExecOutput std_output, int socket_fd) {
312
313         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
314                 return EXEC_OUTPUT_INHERIT;
315
316         return std_output;
317 }
318
319 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
320         ExecInput i;
321
322         assert(context);
323
324         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
325
326         switch (i) {
327
328         case EXEC_INPUT_NULL:
329                 return open_null_as(O_RDONLY, STDIN_FILENO);
330
331         case EXEC_INPUT_TTY:
332         case EXEC_INPUT_TTY_FORCE:
333         case EXEC_INPUT_TTY_FAIL: {
334                 int fd, r;
335
336                 fd = acquire_terminal(tty_path(context),
337                                       i == EXEC_INPUT_TTY_FAIL,
338                                       i == EXEC_INPUT_TTY_FORCE,
339                                       false,
340                                       USEC_INFINITY);
341                 if (fd < 0)
342                         return fd;
343
344                 if (fd != STDIN_FILENO) {
345                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
346                         safe_close(fd);
347                 } else
348                         r = STDIN_FILENO;
349
350                 return r;
351         }
352
353         case EXEC_INPUT_SOCKET:
354                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
355
356         default:
357                 assert_not_reached("Unknown input type");
358         }
359 }
360
361 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
362         ExecOutput o;
363         ExecInput i;
364         int r;
365
366         assert(context);
367         assert(ident);
368
369         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
370         o = fixup_output(context->std_output, socket_fd);
371
372         if (fileno == STDERR_FILENO) {
373                 ExecOutput e;
374                 e = fixup_output(context->std_error, socket_fd);
375
376                 /* This expects the input and output are already set up */
377
378                 /* Don't change the stderr file descriptor if we inherit all
379                  * the way and are not on a tty */
380                 if (e == EXEC_OUTPUT_INHERIT &&
381                     o == EXEC_OUTPUT_INHERIT &&
382                     i == EXEC_INPUT_NULL &&
383                     !is_terminal_input(context->std_input) &&
384                     getppid () != 1)
385                         return fileno;
386
387                 /* Duplicate from stdout if possible */
388                 if (e == o || e == EXEC_OUTPUT_INHERIT)
389                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
390
391                 o = e;
392
393         } else if (o == EXEC_OUTPUT_INHERIT) {
394                 /* If input got downgraded, inherit the original value */
395                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
396                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
397
398                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
399                 if (i != EXEC_INPUT_NULL)
400                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401
402                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
403                 if (getppid() != 1)
404                         return fileno;
405
406                 /* We need to open /dev/null here anew, to get the right access mode. */
407                 return open_null_as(O_WRONLY, fileno);
408         }
409
410         switch (o) {
411
412         case EXEC_OUTPUT_NULL:
413                 return open_null_as(O_WRONLY, fileno);
414
415         case EXEC_OUTPUT_TTY:
416                 if (is_terminal_input(i))
417                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
418
419                 /* We don't reset the terminal if this is just about output */
420                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
421
422         case EXEC_OUTPUT_SYSLOG:
423         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
424         case EXEC_OUTPUT_KMSG:
425         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
426         case EXEC_OUTPUT_JOURNAL:
427         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
428                 r = connect_logger_as(context, o, ident, unit_id, fileno);
429                 if (r < 0) {
430                         log_unit_struct(unit_id,
431                                         LOG_CRIT,
432                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
433                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
434                                                     unit_id, strerror(-r)),
435                                         LOG_ERRNO(-r),
436                                         NULL);
437                         r = open_null_as(O_WRONLY, fileno);
438                 }
439                 return r;
440
441         case EXEC_OUTPUT_SOCKET:
442                 assert(socket_fd >= 0);
443                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
444
445         default:
446                 assert_not_reached("Unknown error type");
447         }
448 }
449
450 static int chown_terminal(int fd, uid_t uid) {
451         struct stat st;
452
453         assert(fd >= 0);
454
455         /* This might fail. What matters are the results. */
456         (void) fchown(fd, uid, -1);
457         (void) fchmod(fd, TTY_MODE);
458
459         if (fstat(fd, &st) < 0)
460                 return -errno;
461
462         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
463                 return -EPERM;
464
465         return 0;
466 }
467
468 static int setup_confirm_stdio(int *_saved_stdin,
469                                int *_saved_stdout) {
470         int fd = -1, saved_stdin, saved_stdout = -1, r;
471
472         assert(_saved_stdin);
473         assert(_saved_stdout);
474
475         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
476         if (saved_stdin < 0)
477                 return -errno;
478
479         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
480         if (saved_stdout < 0) {
481                 r = errno;
482                 goto fail;
483         }
484
485         fd = acquire_terminal(
486                         "/dev/console",
487                         false,
488                         false,
489                         false,
490                         DEFAULT_CONFIRM_USEC);
491         if (fd < 0) {
492                 r = fd;
493                 goto fail;
494         }
495
496         r = chown_terminal(fd, getuid());
497         if (r < 0)
498                 goto fail;
499
500         if (dup2(fd, STDIN_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (dup2(fd, STDOUT_FILENO) < 0) {
506                 r = -errno;
507                 goto fail;
508         }
509
510         if (fd >= 2)
511                 safe_close(fd);
512
513         *_saved_stdin = saved_stdin;
514         *_saved_stdout = saved_stdout;
515
516         return 0;
517
518 fail:
519         safe_close(saved_stdout);
520         safe_close(saved_stdin);
521         safe_close(fd);
522
523         return r;
524 }
525
526 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
527         _cleanup_close_ int fd = -1;
528         va_list ap;
529
530         assert(format);
531
532         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
533         if (fd < 0)
534                 return fd;
535
536         va_start(ap, format);
537         vdprintf(fd, format, ap);
538         va_end(ap);
539
540         return 0;
541 }
542
543 static int restore_confirm_stdio(int *saved_stdin,
544                                  int *saved_stdout) {
545
546         int r = 0;
547
548         assert(saved_stdin);
549         assert(saved_stdout);
550
551         release_terminal();
552
553         if (*saved_stdin >= 0)
554                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
555                         r = -errno;
556
557         if (*saved_stdout >= 0)
558                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
559                         r = -errno;
560
561         safe_close(*saved_stdin);
562         safe_close(*saved_stdout);
563
564         return r;
565 }
566
567 static int ask_for_confirmation(char *response, char **argv) {
568         int saved_stdout = -1, saved_stdin = -1, r;
569         _cleanup_free_ char *line = NULL;
570
571         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
572         if (r < 0)
573                 return r;
574
575         line = exec_command_line(argv);
576         if (!line)
577                 return -ENOMEM;
578
579         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
580
581         restore_confirm_stdio(&saved_stdin, &saved_stdout);
582
583         return r;
584 }
585
586 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
587         bool keep_groups = false;
588         int r;
589
590         assert(context);
591
592         /* Lookup and set GID and supplementary group list. Here too
593          * we avoid NSS lookups for gid=0. */
594
595         if (context->group || username) {
596
597                 if (context->group) {
598                         const char *g = context->group;
599
600                         if ((r = get_group_creds(&g, &gid)) < 0)
601                                 return r;
602                 }
603
604                 /* First step, initialize groups from /etc/groups */
605                 if (username && gid != 0) {
606                         if (initgroups(username, gid) < 0)
607                                 return -errno;
608
609                         keep_groups = true;
610                 }
611
612                 /* Second step, set our gids */
613                 if (setresgid(gid, gid, gid) < 0)
614                         return -errno;
615         }
616
617         if (context->supplementary_groups) {
618                 int ngroups_max, k;
619                 gid_t *gids;
620                 char **i;
621
622                 /* Final step, initialize any manually set supplementary groups */
623                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
624
625                 if (!(gids = new(gid_t, ngroups_max)))
626                         return -ENOMEM;
627
628                 if (keep_groups) {
629                         if ((k = getgroups(ngroups_max, gids)) < 0) {
630                                 free(gids);
631                                 return -errno;
632                         }
633                 } else
634                         k = 0;
635
636                 STRV_FOREACH(i, context->supplementary_groups) {
637                         const char *g;
638
639                         if (k >= ngroups_max) {
640                                 free(gids);
641                                 return -E2BIG;
642                         }
643
644                         g = *i;
645                         r = get_group_creds(&g, gids+k);
646                         if (r < 0) {
647                                 free(gids);
648                                 return r;
649                         }
650
651                         k++;
652                 }
653
654                 if (setgroups(k, gids) < 0) {
655                         free(gids);
656                         return -errno;
657                 }
658
659                 free(gids);
660         }
661
662         return 0;
663 }
664
665 static int enforce_user(const ExecContext *context, uid_t uid) {
666         assert(context);
667
668         /* Sets (but doesn't lookup) the uid and make sure we keep the
669          * capabilities while doing so. */
670
671         if (context->capabilities) {
672                 _cleanup_cap_free_ cap_t d = NULL;
673                 static const cap_value_t bits[] = {
674                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
675                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
676                 };
677
678                 /* First step: If we need to keep capabilities but
679                  * drop privileges we need to make sure we keep our
680                  * caps, while we drop privileges. */
681                 if (uid != 0) {
682                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
683
684                         if (prctl(PR_GET_SECUREBITS) != sb)
685                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
686                                         return -errno;
687                 }
688
689                 /* Second step: set the capabilities. This will reduce
690                  * the capabilities to the minimum we need. */
691
692                 d = cap_dup(context->capabilities);
693                 if (!d)
694                         return -errno;
695
696                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
697                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
698                         return -errno;
699
700                 if (cap_set_proc(d) < 0)
701                         return -errno;
702         }
703
704         /* Third step: actually set the uids */
705         if (setresuid(uid, uid, uid) < 0)
706                 return -errno;
707
708         /* At this point we should have all necessary capabilities but
709            are otherwise a normal user. However, the caps might got
710            corrupted due to the setresuid() so we need clean them up
711            later. This is done outside of this call. */
712
713         return 0;
714 }
715
716 #ifdef HAVE_PAM
717
718 static int null_conv(
719                 int num_msg,
720                 const struct pam_message **msg,
721                 struct pam_response **resp,
722                 void *appdata_ptr) {
723
724         /* We don't support conversations */
725
726         return PAM_CONV_ERR;
727 }
728
729 static int setup_pam(
730                 const char *name,
731                 const char *user,
732                 uid_t uid,
733                 const char *tty,
734                 char ***pam_env,
735                 int fds[], unsigned n_fds) {
736
737         static const struct pam_conv conv = {
738                 .conv = null_conv,
739                 .appdata_ptr = NULL
740         };
741
742         pam_handle_t *handle = NULL;
743         sigset_t ss, old_ss;
744         int pam_code = PAM_SUCCESS;
745         int err;
746         char **e = NULL;
747         bool close_session = false;
748         pid_t pam_pid = 0, parent_pid;
749         int flags = 0;
750
751         assert(name);
752         assert(user);
753         assert(pam_env);
754
755         /* We set up PAM in the parent process, then fork. The child
756          * will then stay around until killed via PR_GET_PDEATHSIG or
757          * systemd via the cgroup logic. It will then remove the PAM
758          * session again. The parent process will exec() the actual
759          * daemon. We do things this way to ensure that the main PID
760          * of the daemon is the one we initially fork()ed. */
761
762         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
763                 flags |= PAM_SILENT;
764
765         pam_code = pam_start(name, user, &conv, &handle);
766         if (pam_code != PAM_SUCCESS) {
767                 handle = NULL;
768                 goto fail;
769         }
770
771         if (tty) {
772                 pam_code = pam_set_item(handle, PAM_TTY, tty);
773                 if (pam_code != PAM_SUCCESS)
774                         goto fail;
775         }
776
777         pam_code = pam_acct_mgmt(handle, flags);
778         if (pam_code != PAM_SUCCESS)
779                 goto fail;
780
781         pam_code = pam_open_session(handle, flags);
782         if (pam_code != PAM_SUCCESS)
783                 goto fail;
784
785         close_session = true;
786
787         e = pam_getenvlist(handle);
788         if (!e) {
789                 pam_code = PAM_BUF_ERR;
790                 goto fail;
791         }
792
793         /* Block SIGTERM, so that we know that it won't get lost in
794          * the child */
795         if (sigemptyset(&ss) < 0 ||
796             sigaddset(&ss, SIGTERM) < 0 ||
797             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
798                 goto fail;
799
800         parent_pid = getpid();
801
802         pam_pid = fork();
803         if (pam_pid < 0)
804                 goto fail;
805
806         if (pam_pid == 0) {
807                 int sig;
808                 int r = EXIT_PAM;
809
810                 /* The child's job is to reset the PAM session on
811                  * termination */
812
813                 /* This string must fit in 10 chars (i.e. the length
814                  * of "/sbin/init"), to look pretty in /bin/ps */
815                 rename_process("(sd-pam)");
816
817                 /* Make sure we don't keep open the passed fds in this
818                 child. We assume that otherwise only those fds are
819                 open here that have been opened by PAM. */
820                 close_many(fds, n_fds);
821
822                 /* Drop privileges - we don't need any to pam_close_session
823                  * and this will make PR_SET_PDEATHSIG work in most cases.
824                  * If this fails, ignore the error - but expect sd-pam threads
825                  * to fail to exit normally */
826                 if (setresuid(uid, uid, uid) < 0)
827                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
828
829                 /* Wait until our parent died. This will only work if
830                  * the above setresuid() succeeds, otherwise the kernel
831                  * will not allow unprivileged parents kill their privileged
832                  * children this way. We rely on the control groups kill logic
833                  * to do the rest for us. */
834                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
835                         goto child_finish;
836
837                 /* Check if our parent process might already have
838                  * died? */
839                 if (getppid() == parent_pid) {
840                         for (;;) {
841                                 if (sigwait(&ss, &sig) < 0) {
842                                         if (errno == EINTR)
843                                                 continue;
844
845                                         goto child_finish;
846                                 }
847
848                                 assert(sig == SIGTERM);
849                                 break;
850                         }
851                 }
852
853                 /* If our parent died we'll end the session */
854                 if (getppid() != parent_pid) {
855                         pam_code = pam_close_session(handle, flags);
856                         if (pam_code != PAM_SUCCESS)
857                                 goto child_finish;
858                 }
859
860                 r = 0;
861
862         child_finish:
863                 pam_end(handle, pam_code | flags);
864                 _exit(r);
865         }
866
867         /* If the child was forked off successfully it will do all the
868          * cleanups, so forget about the handle here. */
869         handle = NULL;
870
871         /* Unblock SIGTERM again in the parent */
872         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
873                 goto fail;
874
875         /* We close the log explicitly here, since the PAM modules
876          * might have opened it, but we don't want this fd around. */
877         closelog();
878
879         *pam_env = e;
880         e = NULL;
881
882         return 0;
883
884 fail:
885         if (pam_code != PAM_SUCCESS) {
886                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
887                 err = -EPERM;  /* PAM errors do not map to errno */
888         } else {
889                 log_error_errno(errno, "PAM failed: %m");
890                 err = -errno;
891         }
892
893         if (handle) {
894                 if (close_session)
895                         pam_code = pam_close_session(handle, flags);
896
897                 pam_end(handle, pam_code | flags);
898         }
899
900         strv_free(e);
901
902         closelog();
903
904         if (pam_pid > 1) {
905                 kill(pam_pid, SIGTERM);
906                 kill(pam_pid, SIGCONT);
907         }
908
909         return err;
910 }
911 #endif
912
913 static void rename_process_from_path(const char *path) {
914         char process_name[11];
915         const char *p;
916         size_t l;
917
918         /* This resulting string must fit in 10 chars (i.e. the length
919          * of "/sbin/init") to look pretty in /bin/ps */
920
921         p = basename(path);
922         if (isempty(p)) {
923                 rename_process("(...)");
924                 return;
925         }
926
927         l = strlen(p);
928         if (l > 8) {
929                 /* The end of the process name is usually more
930                  * interesting, since the first bit might just be
931                  * "systemd-" */
932                 p = p + l - 8;
933                 l = 8;
934         }
935
936         process_name[0] = '(';
937         memcpy(process_name+1, p, l);
938         process_name[1+l] = ')';
939         process_name[1+l+1] = 0;
940
941         rename_process(process_name);
942 }
943
944 #ifdef HAVE_SECCOMP
945
946 static int apply_seccomp(const ExecContext *c) {
947         uint32_t negative_action, action;
948         scmp_filter_ctx *seccomp;
949         Iterator i;
950         void *id;
951         int r;
952
953         assert(c);
954
955         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
956
957         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
958         if (!seccomp)
959                 return -ENOMEM;
960
961         if (c->syscall_archs) {
962
963                 SET_FOREACH(id, c->syscall_archs, i) {
964                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
965                         if (r == -EEXIST)
966                                 continue;
967                         if (r < 0)
968                                 goto finish;
969                 }
970
971         } else {
972                 r = seccomp_add_secondary_archs(seccomp);
973                 if (r < 0)
974                         goto finish;
975         }
976
977         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
978         SET_FOREACH(id, c->syscall_filter, i) {
979                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
980                 if (r < 0)
981                         goto finish;
982         }
983
984         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
985         if (r < 0)
986                 goto finish;
987
988         r = seccomp_load(seccomp);
989
990 finish:
991         seccomp_release(seccomp);
992         return r;
993 }
994
995 static int apply_address_families(const ExecContext *c) {
996         scmp_filter_ctx *seccomp;
997         Iterator i;
998         int r;
999
1000         assert(c);
1001
1002         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1003         if (!seccomp)
1004                 return -ENOMEM;
1005
1006         r = seccomp_add_secondary_archs(seccomp);
1007         if (r < 0)
1008                 goto finish;
1009
1010         if (c->address_families_whitelist) {
1011                 int af, first = 0, last = 0;
1012                 void *afp;
1013
1014                 /* If this is a whitelist, we first block the address
1015                  * families that are out of range and then everything
1016                  * that is not in the set. First, we find the lowest
1017                  * and highest address family in the set. */
1018
1019                 SET_FOREACH(afp, c->address_families, i) {
1020                         af = PTR_TO_INT(afp);
1021
1022                         if (af <= 0 || af >= af_max())
1023                                 continue;
1024
1025                         if (first == 0 || af < first)
1026                                 first = af;
1027
1028                         if (last == 0 || af > last)
1029                                 last = af;
1030                 }
1031
1032                 assert((first == 0) == (last == 0));
1033
1034                 if (first == 0) {
1035
1036                         /* No entries in the valid range, block everything */
1037                         r = seccomp_rule_add(
1038                                         seccomp,
1039                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1040                                         SCMP_SYS(socket),
1041                                         0);
1042                         if (r < 0)
1043                                 goto finish;
1044
1045                 } else {
1046
1047                         /* Block everything below the first entry */
1048                         r = seccomp_rule_add(
1049                                         seccomp,
1050                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1051                                         SCMP_SYS(socket),
1052                                         1,
1053                                         SCMP_A0(SCMP_CMP_LT, first));
1054                         if (r < 0)
1055                                 goto finish;
1056
1057                         /* Block everything above the last entry */
1058                         r = seccomp_rule_add(
1059                                         seccomp,
1060                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1061                                         SCMP_SYS(socket),
1062                                         1,
1063                                         SCMP_A0(SCMP_CMP_GT, last));
1064                         if (r < 0)
1065                                 goto finish;
1066
1067                         /* Block everything between the first and last
1068                          * entry */
1069                         for (af = 1; af < af_max(); af++) {
1070
1071                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1072                                         continue;
1073
1074                                 r = seccomp_rule_add(
1075                                                 seccomp,
1076                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1077                                                 SCMP_SYS(socket),
1078                                                 1,
1079                                                 SCMP_A0(SCMP_CMP_EQ, af));
1080                                 if (r < 0)
1081                                         goto finish;
1082                         }
1083                 }
1084
1085         } else {
1086                 void *af;
1087
1088                 /* If this is a blacklist, then generate one rule for
1089                  * each address family that are then combined in OR
1090                  * checks. */
1091
1092                 SET_FOREACH(af, c->address_families, i) {
1093
1094                         r = seccomp_rule_add(
1095                                         seccomp,
1096                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097                                         SCMP_SYS(socket),
1098                                         1,
1099                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1100                         if (r < 0)
1101                                 goto finish;
1102                 }
1103         }
1104
1105         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1106         if (r < 0)
1107                 goto finish;
1108
1109         r = seccomp_load(seccomp);
1110
1111 finish:
1112         seccomp_release(seccomp);
1113         return r;
1114 }
1115
1116 #endif
1117
1118 static void do_idle_pipe_dance(int idle_pipe[4]) {
1119         assert(idle_pipe);
1120
1121
1122         safe_close(idle_pipe[1]);
1123         safe_close(idle_pipe[2]);
1124
1125         if (idle_pipe[0] >= 0) {
1126                 int r;
1127
1128                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1129
1130                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1131                         /* Signal systemd that we are bored and want to continue. */
1132                         write(idle_pipe[3], "x", 1);
1133
1134                         /* Wait for systemd to react to the signal above. */
1135                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1136                 }
1137
1138                 safe_close(idle_pipe[0]);
1139
1140         }
1141
1142         safe_close(idle_pipe[3]);
1143 }
1144
1145 static int build_environment(
1146                 const ExecContext *c,
1147                 unsigned n_fds,
1148                 usec_t watchdog_usec,
1149                 const char *home,
1150                 const char *username,
1151                 const char *shell,
1152                 char ***ret) {
1153
1154         _cleanup_strv_free_ char **our_env = NULL;
1155         unsigned n_env = 0;
1156         char *x;
1157
1158         assert(c);
1159         assert(ret);
1160
1161         our_env = new0(char*, 10);
1162         if (!our_env)
1163                 return -ENOMEM;
1164
1165         if (n_fds > 0) {
1166                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1167                         return -ENOMEM;
1168                 our_env[n_env++] = x;
1169
1170                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1171                         return -ENOMEM;
1172                 our_env[n_env++] = x;
1173         }
1174
1175         if (watchdog_usec > 0) {
1176                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1177                         return -ENOMEM;
1178                 our_env[n_env++] = x;
1179
1180                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1181                         return -ENOMEM;
1182                 our_env[n_env++] = x;
1183         }
1184
1185         if (home) {
1186                 x = strappend("HOME=", home);
1187                 if (!x)
1188                         return -ENOMEM;
1189                 our_env[n_env++] = x;
1190         }
1191
1192         if (username) {
1193                 x = strappend("LOGNAME=", username);
1194                 if (!x)
1195                         return -ENOMEM;
1196                 our_env[n_env++] = x;
1197
1198                 x = strappend("USER=", username);
1199                 if (!x)
1200                         return -ENOMEM;
1201                 our_env[n_env++] = x;
1202         }
1203
1204         if (shell) {
1205                 x = strappend("SHELL=", shell);
1206                 if (!x)
1207                         return -ENOMEM;
1208                 our_env[n_env++] = x;
1209         }
1210
1211         if (is_terminal_input(c->std_input) ||
1212             c->std_output == EXEC_OUTPUT_TTY ||
1213             c->std_error == EXEC_OUTPUT_TTY ||
1214             c->tty_path) {
1215
1216                 x = strdup(default_term_for_tty(tty_path(c)));
1217                 if (!x)
1218                         return -ENOMEM;
1219                 our_env[n_env++] = x;
1220         }
1221
1222         our_env[n_env++] = NULL;
1223         assert(n_env <= 10);
1224
1225         *ret = our_env;
1226         our_env = NULL;
1227
1228         return 0;
1229 }
1230
1231 static int exec_child(ExecCommand *command,
1232                       const ExecContext *context,
1233                       const ExecParameters *params,
1234                       ExecRuntime *runtime,
1235                       char **argv,
1236                       int socket_fd,
1237                       int *fds, unsigned n_fds,
1238                       char **files_env,
1239                       int *error) {
1240
1241         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1242         _cleanup_free_ char *mac_selinux_context_net = NULL;
1243         const char *username = NULL, *home = NULL, *shell = NULL;
1244         unsigned n_dont_close = 0;
1245         int dont_close[n_fds + 4];
1246         uid_t uid = UID_INVALID;
1247         gid_t gid = GID_INVALID;
1248         int i, err;
1249
1250         assert(command);
1251         assert(context);
1252         assert(params);
1253         assert(error);
1254
1255         rename_process_from_path(command->path);
1256
1257         /* We reset exactly these signals, since they are the
1258          * only ones we set to SIG_IGN in the main daemon. All
1259          * others we leave untouched because we set them to
1260          * SIG_DFL or a valid handler initially, both of which
1261          * will be demoted to SIG_DFL. */
1262         default_signals(SIGNALS_CRASH_HANDLER,
1263                         SIGNALS_IGNORE, -1);
1264
1265         if (context->ignore_sigpipe)
1266                 ignore_signals(SIGPIPE, -1);
1267
1268         err = reset_signal_mask();
1269         if (err < 0) {
1270                 *error = EXIT_SIGNAL_MASK;
1271                 return err;
1272         }
1273
1274         if (params->idle_pipe)
1275                 do_idle_pipe_dance(params->idle_pipe);
1276
1277         /* Close sockets very early to make sure we don't
1278          * block init reexecution because it cannot bind its
1279          * sockets */
1280         log_forget_fds();
1281
1282         if (socket_fd >= 0)
1283                 dont_close[n_dont_close++] = socket_fd;
1284         if (n_fds > 0) {
1285                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1286                 n_dont_close += n_fds;
1287         }
1288         if (params->bus_endpoint_fd >= 0)
1289                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1290         if (runtime) {
1291                 if (runtime->netns_storage_socket[0] >= 0)
1292                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1293                 if (runtime->netns_storage_socket[1] >= 0)
1294                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1295         }
1296
1297         err = close_all_fds(dont_close, n_dont_close);
1298         if (err < 0) {
1299                 *error = EXIT_FDS;
1300                 return err;
1301         }
1302
1303         if (!context->same_pgrp)
1304                 if (setsid() < 0) {
1305                         *error = EXIT_SETSID;
1306                         return -errno;
1307                 }
1308
1309         exec_context_tty_reset(context);
1310
1311         if (params->confirm_spawn) {
1312                 char response;
1313
1314                 err = ask_for_confirmation(&response, argv);
1315                 if (err == -ETIMEDOUT)
1316                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1317                 else if (err < 0)
1318                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1319                 else if (response == 's') {
1320                         write_confirm_message("Skipping execution.\n");
1321                         *error = EXIT_CONFIRM;
1322                         return -ECANCELED;
1323                 } else if (response == 'n') {
1324                         write_confirm_message("Failing execution.\n");
1325                         *error = 0;
1326                         return 0;
1327                 }
1328         }
1329
1330         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1331          * must sure to drop O_NONBLOCK */
1332         if (socket_fd >= 0)
1333                 fd_nonblock(socket_fd, false);
1334
1335         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1336         if (err < 0) {
1337                 *error = EXIT_STDIN;
1338                 return err;
1339         }
1340
1341         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1342         if (err < 0) {
1343                 *error = EXIT_STDOUT;
1344                 return err;
1345         }
1346
1347         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1348         if (err < 0) {
1349                 *error = EXIT_STDERR;
1350                 return err;
1351         }
1352
1353         if (params->cgroup_path) {
1354                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1355                 if (err < 0) {
1356                         *error = EXIT_CGROUP;
1357                         return err;
1358                 }
1359         }
1360
1361         if (context->oom_score_adjust_set) {
1362                 char t[16];
1363
1364                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1365                 char_array_0(t);
1366
1367                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1368                         *error = EXIT_OOM_ADJUST;
1369                         return -errno;
1370                 }
1371         }
1372
1373         if (context->nice_set)
1374                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1375                         *error = EXIT_NICE;
1376                         return -errno;
1377                 }
1378
1379         if (context->cpu_sched_set) {
1380                 struct sched_param param = {
1381                         .sched_priority = context->cpu_sched_priority,
1382                 };
1383
1384                 err = sched_setscheduler(0,
1385                                          context->cpu_sched_policy |
1386                                          (context->cpu_sched_reset_on_fork ?
1387                                           SCHED_RESET_ON_FORK : 0),
1388                                          &param);
1389                 if (err < 0) {
1390                         *error = EXIT_SETSCHEDULER;
1391                         return -errno;
1392                 }
1393         }
1394
1395         if (context->cpuset)
1396                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1397                         *error = EXIT_CPUAFFINITY;
1398                         return -errno;
1399                 }
1400
1401         if (context->ioprio_set)
1402                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1403                         *error = EXIT_IOPRIO;
1404                         return -errno;
1405                 }
1406
1407         if (context->timer_slack_nsec != NSEC_INFINITY)
1408                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1409                         *error = EXIT_TIMERSLACK;
1410                         return -errno;
1411                 }
1412
1413         if (context->personality != 0xffffffffUL)
1414                 if (personality(context->personality) < 0) {
1415                         *error = EXIT_PERSONALITY;
1416                         return -errno;
1417                 }
1418
1419         if (context->utmp_id)
1420                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1421
1422         if (context->user) {
1423                 username = context->user;
1424                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1425                 if (err < 0) {
1426                         *error = EXIT_USER;
1427                         return err;
1428                 }
1429
1430                 if (is_terminal_input(context->std_input)) {
1431                         err = chown_terminal(STDIN_FILENO, uid);
1432                         if (err < 0) {
1433                                 *error = EXIT_STDIN;
1434                                 return err;
1435                         }
1436                 }
1437         }
1438
1439 #ifdef ENABLE_KDBUS
1440         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1441                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1442
1443                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1444                 if (err < 0) {
1445                         *error = EXIT_BUS_ENDPOINT;
1446                         return err;
1447                 }
1448         }
1449 #endif
1450
1451         /* If delegation is enabled we'll pass ownership of the cgroup
1452          * (but only in systemd's own controller hierarchy!) to the
1453          * user of the new process. */
1454         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1455                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1456                 if (err < 0) {
1457                         *error = EXIT_CGROUP;
1458                         return err;
1459                 }
1460
1461
1462                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1463                 if (err < 0) {
1464                         *error = EXIT_CGROUP;
1465                         return err;
1466                 }
1467         }
1468
1469         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1470                 char **rt;
1471
1472                 STRV_FOREACH(rt, context->runtime_directory) {
1473                         _cleanup_free_ char *p;
1474
1475                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1476                         if (!p) {
1477                                 *error = EXIT_RUNTIME_DIRECTORY;
1478                                 return -ENOMEM;
1479                         }
1480
1481                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1482                         if (err < 0) {
1483                                 *error = EXIT_RUNTIME_DIRECTORY;
1484                                 return err;
1485                         }
1486                 }
1487         }
1488
1489         if (params->apply_permissions) {
1490                 err = enforce_groups(context, username, gid);
1491                 if (err < 0) {
1492                         *error = EXIT_GROUP;
1493                         return err;
1494                 }
1495         }
1496
1497         umask(context->umask);
1498
1499 #ifdef HAVE_PAM
1500         if (params->apply_permissions && context->pam_name && username) {
1501                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1502                 if (err < 0) {
1503                         *error = EXIT_PAM;
1504                         return err;
1505                 }
1506         }
1507 #endif
1508
1509         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1510                 err = setup_netns(runtime->netns_storage_socket);
1511                 if (err < 0) {
1512                         *error = EXIT_NETWORK;
1513                         return err;
1514                 }
1515         }
1516
1517         if (!strv_isempty(context->read_write_dirs) ||
1518             !strv_isempty(context->read_only_dirs) ||
1519             !strv_isempty(context->inaccessible_dirs) ||
1520             context->mount_flags != 0 ||
1521             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1522             params->bus_endpoint_path ||
1523             context->private_devices ||
1524             context->protect_system != PROTECT_SYSTEM_NO ||
1525             context->protect_home != PROTECT_HOME_NO) {
1526
1527                 char *tmp = NULL, *var = NULL;
1528
1529                 /* The runtime struct only contains the parent
1530                  * of the private /tmp, which is
1531                  * non-accessible to world users. Inside of it
1532                  * there's a /tmp that is sticky, and that's
1533                  * the one we want to use here. */
1534
1535                 if (context->private_tmp && runtime) {
1536                         if (runtime->tmp_dir)
1537                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1538                         if (runtime->var_tmp_dir)
1539                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1540                 }
1541
1542                 err = setup_namespace(
1543                                 context->read_write_dirs,
1544                                 context->read_only_dirs,
1545                                 context->inaccessible_dirs,
1546                                 tmp,
1547                                 var,
1548                                 params->bus_endpoint_path,
1549                                 context->private_devices,
1550                                 context->protect_home,
1551                                 context->protect_system,
1552                                 context->mount_flags);
1553
1554                 if (err == -EPERM)
1555                         log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1556                 else if (err < 0) {
1557                         *error = EXIT_NAMESPACE;
1558                         return err;
1559                 }
1560         }
1561
1562         if (params->apply_chroot) {
1563                 if (context->root_directory)
1564                         if (chroot(context->root_directory) < 0) {
1565                                 *error = EXIT_CHROOT;
1566                                 return -errno;
1567                         }
1568
1569                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1570                         *error = EXIT_CHDIR;
1571                         return -errno;
1572                 }
1573         } else {
1574                 _cleanup_free_ char *d = NULL;
1575
1576                 if (asprintf(&d, "%s/%s",
1577                              context->root_directory ? context->root_directory : "",
1578                              context->working_directory ? context->working_directory : "") < 0) {
1579                         *error = EXIT_MEMORY;
1580                         return -ENOMEM;
1581                 }
1582
1583                 if (chdir(d) < 0) {
1584                         *error = EXIT_CHDIR;
1585                         return -errno;
1586                 }
1587         }
1588
1589 #ifdef HAVE_SELINUX
1590         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1591                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1592                 if (err < 0) {
1593                         *error = EXIT_SELINUX_CONTEXT;
1594                         return err;
1595                 }
1596         }
1597 #endif
1598
1599         /* We repeat the fd closing here, to make sure that
1600          * nothing is leaked from the PAM modules. Note that
1601          * we are more aggressive this time since socket_fd
1602          * and the netns fds we don't need anymore. The custom
1603          * endpoint fd was needed to upload the policy and can
1604          * now be closed as well. */
1605         err = close_all_fds(fds, n_fds);
1606         if (err >= 0)
1607                 err = shift_fds(fds, n_fds);
1608         if (err >= 0)
1609                 err = flags_fds(fds, n_fds, context->non_blocking);
1610         if (err < 0) {
1611                 *error = EXIT_FDS;
1612                 return err;
1613         }
1614
1615         if (params->apply_permissions) {
1616
1617                 for (i = 0; i < _RLIMIT_MAX; i++) {
1618                         if (!context->rlimit[i])
1619                                 continue;
1620
1621                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1622                                 *error = EXIT_LIMITS;
1623                                 return -errno;
1624                         }
1625                 }
1626
1627                 if (context->capability_bounding_set_drop) {
1628                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1629                         if (err < 0) {
1630                                 *error = EXIT_CAPABILITIES;
1631                                 return err;
1632                         }
1633                 }
1634
1635 #ifdef HAVE_SMACK
1636                 if (context->smack_process_label) {
1637                         err = mac_smack_apply_pid(0, context->smack_process_label);
1638                         if (err < 0) {
1639                                 *error = EXIT_SMACK_PROCESS_LABEL;
1640                                 return err;
1641                         }
1642                 }
1643 #endif
1644
1645                 if (context->user) {
1646                         err = enforce_user(context, uid);
1647                         if (err < 0) {
1648                                 *error = EXIT_USER;
1649                                 return err;
1650                         }
1651                 }
1652
1653                 /* PR_GET_SECUREBITS is not privileged, while
1654                  * PR_SET_SECUREBITS is. So to suppress
1655                  * potential EPERMs we'll try not to call
1656                  * PR_SET_SECUREBITS unless necessary. */
1657                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1658                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1659                                 *error = EXIT_SECUREBITS;
1660                                 return -errno;
1661                         }
1662
1663                 if (context->capabilities)
1664                         if (cap_set_proc(context->capabilities) < 0) {
1665                                 *error = EXIT_CAPABILITIES;
1666                                 return -errno;
1667                         }
1668
1669                 if (context->no_new_privileges)
1670                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1671                                 *error = EXIT_NO_NEW_PRIVILEGES;
1672                                 return -errno;
1673                         }
1674
1675 #ifdef HAVE_SECCOMP
1676                 if (context->address_families_whitelist ||
1677                     !set_isempty(context->address_families)) {
1678                         err = apply_address_families(context);
1679                         if (err < 0) {
1680                                 *error = EXIT_ADDRESS_FAMILIES;
1681                                 return err;
1682                         }
1683                 }
1684
1685                 if (context->syscall_whitelist ||
1686                     !set_isempty(context->syscall_filter) ||
1687                     !set_isempty(context->syscall_archs)) {
1688                         err = apply_seccomp(context);
1689                         if (err < 0) {
1690                                 *error = EXIT_SECCOMP;
1691                                 return err;
1692                         }
1693                 }
1694 #endif
1695
1696 #ifdef HAVE_SELINUX
1697                 if (mac_selinux_use()) {
1698                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1699
1700                         if (exec_context) {
1701                                 err = setexeccon(exec_context);
1702                                 if (err < 0) {
1703                                         *error = EXIT_SELINUX_CONTEXT;
1704                                         return err;
1705                                 }
1706                         }
1707                 }
1708 #endif
1709
1710 #ifdef HAVE_APPARMOR
1711                 if (context->apparmor_profile && mac_apparmor_use()) {
1712                         err = aa_change_onexec(context->apparmor_profile);
1713                         if (err < 0 && !context->apparmor_profile_ignore) {
1714                                 *error = EXIT_APPARMOR_PROFILE;
1715                                 return -errno;
1716                         }
1717                 }
1718 #endif
1719         }
1720
1721         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1722         if (err < 0) {
1723                 *error = EXIT_MEMORY;
1724                 return err;
1725         }
1726
1727         final_env = strv_env_merge(5,
1728                                    params->environment,
1729                                    our_env,
1730                                    context->environment,
1731                                    files_env,
1732                                    pam_env,
1733                                    NULL);
1734         if (!final_env) {
1735                 *error = EXIT_MEMORY;
1736                 return -ENOMEM;
1737         }
1738
1739         final_argv = replace_env_argv(argv, final_env);
1740         if (!final_argv) {
1741                 *error = EXIT_MEMORY;
1742                 return -ENOMEM;
1743         }
1744
1745         final_env = strv_env_clean(final_env);
1746
1747         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1748                 _cleanup_free_ char *line;
1749
1750                 line = exec_command_line(final_argv);
1751                 if (line) {
1752                         log_open();
1753                         log_unit_struct(params->unit_id,
1754                                         LOG_DEBUG,
1755                                         "EXECUTABLE=%s", command->path,
1756                                         LOG_MESSAGE("Executing: %s", line),
1757                                         NULL);
1758                         log_close();
1759                 }
1760         }
1761         execve(command->path, final_argv, final_env);
1762         *error = EXIT_EXEC;
1763         return -errno;
1764 }
1765
1766 int exec_spawn(ExecCommand *command,
1767                const ExecContext *context,
1768                const ExecParameters *params,
1769                ExecRuntime *runtime,
1770                pid_t *ret) {
1771
1772         _cleanup_strv_free_ char **files_env = NULL;
1773         int *fds = NULL; unsigned n_fds = 0;
1774         char *line, **argv;
1775         int socket_fd;
1776         pid_t pid;
1777         int err;
1778
1779         assert(command);
1780         assert(context);
1781         assert(ret);
1782         assert(params);
1783         assert(params->fds || params->n_fds <= 0);
1784
1785         if (context->std_input == EXEC_INPUT_SOCKET ||
1786             context->std_output == EXEC_OUTPUT_SOCKET ||
1787             context->std_error == EXEC_OUTPUT_SOCKET) {
1788
1789                 if (params->n_fds != 1)
1790                         return -EINVAL;
1791
1792                 socket_fd = params->fds[0];
1793         } else {
1794                 socket_fd = -1;
1795                 fds = params->fds;
1796                 n_fds = params->n_fds;
1797         }
1798
1799         err = exec_context_load_environment(context, params->unit_id, &files_env);
1800         if (err < 0) {
1801                 log_unit_struct(params->unit_id,
1802                                 LOG_ERR,
1803                                 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1804                                 LOG_ERRNO(-err),
1805                                 NULL);
1806                 return err;
1807         }
1808
1809         argv = params->argv ?: command->argv;
1810
1811         line = exec_command_line(argv);
1812         if (!line)
1813                 return log_oom();
1814
1815         log_unit_struct(params->unit_id,
1816                         LOG_DEBUG,
1817                         "EXECUTABLE=%s", command->path,
1818                         LOG_MESSAGE("About to execute: %s", line),
1819                         NULL);
1820         free(line);
1821
1822         pid = fork();
1823         if (pid < 0)
1824                 return -errno;
1825
1826         if (pid == 0) {
1827                 int r;
1828
1829                 err = exec_child(command,
1830                                  context,
1831                                  params,
1832                                  runtime,
1833                                  argv,
1834                                  socket_fd,
1835                                  fds, n_fds,
1836                                  files_env,
1837                                  &r);
1838                 if (r != 0) {
1839                         log_open();
1840                         log_struct(LOG_ERR,
1841                                    LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1842                                    "EXECUTABLE=%s", command->path,
1843                                    LOG_MESSAGE("Failed at step %s spawning %s: %s",
1844                                                exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1845                                                command->path, strerror(-err)),
1846                                    LOG_ERRNO(-err),
1847                                    NULL);
1848                         log_close();
1849                 }
1850
1851                 _exit(r);
1852         }
1853
1854         log_unit_struct(params->unit_id,
1855                         LOG_DEBUG,
1856                         LOG_MESSAGE("Forked %s as "PID_FMT,
1857                                     command->path, pid),
1858                         NULL);
1859
1860         /* We add the new process to the cgroup both in the child (so
1861          * that we can be sure that no user code is ever executed
1862          * outside of the cgroup) and in the parent (so that we can be
1863          * sure that when we kill the cgroup the process will be
1864          * killed too). */
1865         if (params->cgroup_path)
1866                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1867
1868         exec_status_start(&command->exec_status, pid);
1869
1870         *ret = pid;
1871         return 0;
1872 }
1873
1874 void exec_context_init(ExecContext *c) {
1875         assert(c);
1876
1877         c->umask = 0022;
1878         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1879         c->cpu_sched_policy = SCHED_OTHER;
1880         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1881         c->syslog_level_prefix = true;
1882         c->ignore_sigpipe = true;
1883         c->timer_slack_nsec = NSEC_INFINITY;
1884         c->personality = 0xffffffffUL;
1885         c->runtime_directory_mode = 0755;
1886 }
1887
1888 void exec_context_done(ExecContext *c) {
1889         unsigned l;
1890
1891         assert(c);
1892
1893         strv_free(c->environment);
1894         c->environment = NULL;
1895
1896         strv_free(c->environment_files);
1897         c->environment_files = NULL;
1898
1899         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1900                 free(c->rlimit[l]);
1901                 c->rlimit[l] = NULL;
1902         }
1903
1904         free(c->working_directory);
1905         c->working_directory = NULL;
1906         free(c->root_directory);
1907         c->root_directory = NULL;
1908
1909         free(c->tty_path);
1910         c->tty_path = NULL;
1911
1912         free(c->syslog_identifier);
1913         c->syslog_identifier = NULL;
1914
1915         free(c->user);
1916         c->user = NULL;
1917
1918         free(c->group);
1919         c->group = NULL;
1920
1921         strv_free(c->supplementary_groups);
1922         c->supplementary_groups = NULL;
1923
1924         free(c->pam_name);
1925         c->pam_name = NULL;
1926
1927         if (c->capabilities) {
1928                 cap_free(c->capabilities);
1929                 c->capabilities = NULL;
1930         }
1931
1932         strv_free(c->read_only_dirs);
1933         c->read_only_dirs = NULL;
1934
1935         strv_free(c->read_write_dirs);
1936         c->read_write_dirs = NULL;
1937
1938         strv_free(c->inaccessible_dirs);
1939         c->inaccessible_dirs = NULL;
1940
1941         if (c->cpuset)
1942                 CPU_FREE(c->cpuset);
1943
1944         free(c->utmp_id);
1945         c->utmp_id = NULL;
1946
1947         free(c->selinux_context);
1948         c->selinux_context = NULL;
1949
1950         free(c->apparmor_profile);
1951         c->apparmor_profile = NULL;
1952
1953         set_free(c->syscall_filter);
1954         c->syscall_filter = NULL;
1955
1956         set_free(c->syscall_archs);
1957         c->syscall_archs = NULL;
1958
1959         set_free(c->address_families);
1960         c->address_families = NULL;
1961
1962         strv_free(c->runtime_directory);
1963         c->runtime_directory = NULL;
1964
1965         bus_endpoint_free(c->bus_endpoint);
1966         c->bus_endpoint = NULL;
1967 }
1968
1969 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1970         char **i;
1971
1972         assert(c);
1973
1974         if (!runtime_prefix)
1975                 return 0;
1976
1977         STRV_FOREACH(i, c->runtime_directory) {
1978                 _cleanup_free_ char *p;
1979
1980                 p = strjoin(runtime_prefix, "/", *i, NULL);
1981                 if (!p)
1982                         return -ENOMEM;
1983
1984                 /* We execute this synchronously, since we need to be
1985                  * sure this is gone when we start the service
1986                  * next. */
1987                 rm_rf(p, false, true, false);
1988         }
1989
1990         return 0;
1991 }
1992
1993 void exec_command_done(ExecCommand *c) {
1994         assert(c);
1995
1996         free(c->path);
1997         c->path = NULL;
1998
1999         strv_free(c->argv);
2000         c->argv = NULL;
2001 }
2002
2003 void exec_command_done_array(ExecCommand *c, unsigned n) {
2004         unsigned i;
2005
2006         for (i = 0; i < n; i++)
2007                 exec_command_done(c+i);
2008 }
2009
2010 ExecCommand* exec_command_free_list(ExecCommand *c) {
2011         ExecCommand *i;
2012
2013         while ((i = c)) {
2014                 LIST_REMOVE(command, c, i);
2015                 exec_command_done(i);
2016                 free(i);
2017         }
2018
2019         return NULL;
2020 }
2021
2022 void exec_command_free_array(ExecCommand **c, unsigned n) {
2023         unsigned i;
2024
2025         for (i = 0; i < n; i++)
2026                 c[i] = exec_command_free_list(c[i]);
2027 }
2028
2029 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2030         char **i, **r = NULL;
2031
2032         assert(c);
2033         assert(l);
2034
2035         STRV_FOREACH(i, c->environment_files) {
2036                 char *fn;
2037                 int k;
2038                 bool ignore = false;
2039                 char **p;
2040                 _cleanup_globfree_ glob_t pglob = {};
2041                 int count, n;
2042
2043                 fn = *i;
2044
2045                 if (fn[0] == '-') {
2046                         ignore = true;
2047                         fn ++;
2048                 }
2049
2050                 if (!path_is_absolute(fn)) {
2051                         if (ignore)
2052                                 continue;
2053
2054                         strv_free(r);
2055                         return -EINVAL;
2056                 }
2057
2058                 /* Filename supports globbing, take all matching files */
2059                 errno = 0;
2060                 if (glob(fn, 0, NULL, &pglob) != 0) {
2061                         if (ignore)
2062                                 continue;
2063
2064                         strv_free(r);
2065                         return errno ? -errno : -EINVAL;
2066                 }
2067                 count = pglob.gl_pathc;
2068                 if (count == 0) {
2069                         if (ignore)
2070                                 continue;
2071
2072                         strv_free(r);
2073                         return -EINVAL;
2074                 }
2075                 for (n = 0; n < count; n++) {
2076                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2077                         if (k < 0) {
2078                                 if (ignore)
2079                                         continue;
2080
2081                                 strv_free(r);
2082                                 return k;
2083                         }
2084                         /* Log invalid environment variables with filename */
2085                         if (p)
2086                                 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2087
2088                         if (r == NULL)
2089                                 r = p;
2090                         else {
2091                                 char **m;
2092
2093                                 m = strv_env_merge(2, r, p);
2094                                 strv_free(r);
2095                                 strv_free(p);
2096                                 if (!m)
2097                                         return -ENOMEM;
2098
2099                                 r = m;
2100                         }
2101                 }
2102         }
2103
2104         *l = r;
2105
2106         return 0;
2107 }
2108
2109 static bool tty_may_match_dev_console(const char *tty) {
2110         _cleanup_free_ char *active = NULL;
2111        char *console;
2112
2113         if (startswith(tty, "/dev/"))
2114                 tty += 5;
2115
2116         /* trivial identity? */
2117         if (streq(tty, "console"))
2118                 return true;
2119
2120         console = resolve_dev_console(&active);
2121         /* if we could not resolve, assume it may */
2122         if (!console)
2123                 return true;
2124
2125         /* "tty0" means the active VC, so it may be the same sometimes */
2126         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2127 }
2128
2129 bool exec_context_may_touch_console(ExecContext *ec) {
2130         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2131                 is_terminal_input(ec->std_input) ||
2132                 is_terminal_output(ec->std_output) ||
2133                 is_terminal_output(ec->std_error)) &&
2134                tty_may_match_dev_console(tty_path(ec));
2135 }
2136
2137 static void strv_fprintf(FILE *f, char **l) {
2138         char **g;
2139
2140         assert(f);
2141
2142         STRV_FOREACH(g, l)
2143                 fprintf(f, " %s", *g);
2144 }
2145
2146 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2147         char **e;
2148         unsigned i;
2149
2150         assert(c);
2151         assert(f);
2152
2153         prefix = strempty(prefix);
2154
2155         fprintf(f,
2156                 "%sUMask: %04o\n"
2157                 "%sWorkingDirectory: %s\n"
2158                 "%sRootDirectory: %s\n"
2159                 "%sNonBlocking: %s\n"
2160                 "%sPrivateTmp: %s\n"
2161                 "%sPrivateNetwork: %s\n"
2162                 "%sPrivateDevices: %s\n"
2163                 "%sProtectHome: %s\n"
2164                 "%sProtectSystem: %s\n"
2165                 "%sIgnoreSIGPIPE: %s\n",
2166                 prefix, c->umask,
2167                 prefix, c->working_directory ? c->working_directory : "/",
2168                 prefix, c->root_directory ? c->root_directory : "/",
2169                 prefix, yes_no(c->non_blocking),
2170                 prefix, yes_no(c->private_tmp),
2171                 prefix, yes_no(c->private_network),
2172                 prefix, yes_no(c->private_devices),
2173                 prefix, protect_home_to_string(c->protect_home),
2174                 prefix, protect_system_to_string(c->protect_system),
2175                 prefix, yes_no(c->ignore_sigpipe));
2176
2177         STRV_FOREACH(e, c->environment)
2178                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2179
2180         STRV_FOREACH(e, c->environment_files)
2181                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2182
2183         if (c->nice_set)
2184                 fprintf(f,
2185                         "%sNice: %i\n",
2186                         prefix, c->nice);
2187
2188         if (c->oom_score_adjust_set)
2189                 fprintf(f,
2190                         "%sOOMScoreAdjust: %i\n",
2191                         prefix, c->oom_score_adjust);
2192
2193         for (i = 0; i < RLIM_NLIMITS; i++)
2194                 if (c->rlimit[i])
2195                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2196                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2197
2198         if (c->ioprio_set) {
2199                 _cleanup_free_ char *class_str = NULL;
2200
2201                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2202                 fprintf(f,
2203                         "%sIOSchedulingClass: %s\n"
2204                         "%sIOPriority: %i\n",
2205                         prefix, strna(class_str),
2206                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2207         }
2208
2209         if (c->cpu_sched_set) {
2210                 _cleanup_free_ char *policy_str = NULL;
2211
2212                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2213                 fprintf(f,
2214                         "%sCPUSchedulingPolicy: %s\n"
2215                         "%sCPUSchedulingPriority: %i\n"
2216                         "%sCPUSchedulingResetOnFork: %s\n",
2217                         prefix, strna(policy_str),
2218                         prefix, c->cpu_sched_priority,
2219                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2220         }
2221
2222         if (c->cpuset) {
2223                 fprintf(f, "%sCPUAffinity:", prefix);
2224                 for (i = 0; i < c->cpuset_ncpus; i++)
2225                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2226                                 fprintf(f, " %u", i);
2227                 fputs("\n", f);
2228         }
2229
2230         if (c->timer_slack_nsec != NSEC_INFINITY)
2231                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2232
2233         fprintf(f,
2234                 "%sStandardInput: %s\n"
2235                 "%sStandardOutput: %s\n"
2236                 "%sStandardError: %s\n",
2237                 prefix, exec_input_to_string(c->std_input),
2238                 prefix, exec_output_to_string(c->std_output),
2239                 prefix, exec_output_to_string(c->std_error));
2240
2241         if (c->tty_path)
2242                 fprintf(f,
2243                         "%sTTYPath: %s\n"
2244                         "%sTTYReset: %s\n"
2245                         "%sTTYVHangup: %s\n"
2246                         "%sTTYVTDisallocate: %s\n",
2247                         prefix, c->tty_path,
2248                         prefix, yes_no(c->tty_reset),
2249                         prefix, yes_no(c->tty_vhangup),
2250                         prefix, yes_no(c->tty_vt_disallocate));
2251
2252         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2253             c->std_output == EXEC_OUTPUT_KMSG ||
2254             c->std_output == EXEC_OUTPUT_JOURNAL ||
2255             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2256             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2257             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2258             c->std_error == EXEC_OUTPUT_SYSLOG ||
2259             c->std_error == EXEC_OUTPUT_KMSG ||
2260             c->std_error == EXEC_OUTPUT_JOURNAL ||
2261             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2262             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2263             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2264
2265                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2266
2267                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2268                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2269
2270                 fprintf(f,
2271                         "%sSyslogFacility: %s\n"
2272                         "%sSyslogLevel: %s\n",
2273                         prefix, strna(fac_str),
2274                         prefix, strna(lvl_str));
2275         }
2276
2277         if (c->capabilities) {
2278                 _cleanup_cap_free_charp_ char *t;
2279
2280                 t = cap_to_text(c->capabilities, NULL);
2281                 if (t)
2282                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2283         }
2284
2285         if (c->secure_bits)
2286                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2287                         prefix,
2288                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2289                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2290                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2291                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2292                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2293                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2294
2295         if (c->capability_bounding_set_drop) {
2296                 unsigned long l;
2297                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2298
2299                 for (l = 0; l <= cap_last_cap(); l++)
2300                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2301                                 fprintf(f, " %s", strna(capability_to_name(l)));
2302
2303                 fputs("\n", f);
2304         }
2305
2306         if (c->user)
2307                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2308         if (c->group)
2309                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2310
2311         if (strv_length(c->supplementary_groups) > 0) {
2312                 fprintf(f, "%sSupplementaryGroups:", prefix);
2313                 strv_fprintf(f, c->supplementary_groups);
2314                 fputs("\n", f);
2315         }
2316
2317         if (c->pam_name)
2318                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2319
2320         if (strv_length(c->read_write_dirs) > 0) {
2321                 fprintf(f, "%sReadWriteDirs:", prefix);
2322                 strv_fprintf(f, c->read_write_dirs);
2323                 fputs("\n", f);
2324         }
2325
2326         if (strv_length(c->read_only_dirs) > 0) {
2327                 fprintf(f, "%sReadOnlyDirs:", prefix);
2328                 strv_fprintf(f, c->read_only_dirs);
2329                 fputs("\n", f);
2330         }
2331
2332         if (strv_length(c->inaccessible_dirs) > 0) {
2333                 fprintf(f, "%sInaccessibleDirs:", prefix);
2334                 strv_fprintf(f, c->inaccessible_dirs);
2335                 fputs("\n", f);
2336         }
2337
2338         if (c->utmp_id)
2339                 fprintf(f,
2340                         "%sUtmpIdentifier: %s\n",
2341                         prefix, c->utmp_id);
2342
2343         if (c->selinux_context)
2344                 fprintf(f,
2345                         "%sSELinuxContext: %s%s\n",
2346                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2347
2348         if (c->personality != 0xffffffffUL)
2349                 fprintf(f,
2350                         "%sPersonality: %s\n",
2351                         prefix, strna(personality_to_string(c->personality)));
2352
2353         if (c->syscall_filter) {
2354 #ifdef HAVE_SECCOMP
2355                 Iterator j;
2356                 void *id;
2357                 bool first = true;
2358 #endif
2359
2360                 fprintf(f,
2361                         "%sSystemCallFilter: ",
2362                         prefix);
2363
2364                 if (!c->syscall_whitelist)
2365                         fputc('~', f);
2366
2367 #ifdef HAVE_SECCOMP
2368                 SET_FOREACH(id, c->syscall_filter, j) {
2369                         _cleanup_free_ char *name = NULL;
2370
2371                         if (first)
2372                                 first = false;
2373                         else
2374                                 fputc(' ', f);
2375
2376                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2377                         fputs(strna(name), f);
2378                 }
2379 #endif
2380
2381                 fputc('\n', f);
2382         }
2383
2384         if (c->syscall_archs) {
2385 #ifdef HAVE_SECCOMP
2386                 Iterator j;
2387                 void *id;
2388 #endif
2389
2390                 fprintf(f,
2391                         "%sSystemCallArchitectures:",
2392                         prefix);
2393
2394 #ifdef HAVE_SECCOMP
2395                 SET_FOREACH(id, c->syscall_archs, j)
2396                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2397 #endif
2398                 fputc('\n', f);
2399         }
2400
2401         if (c->syscall_errno != 0)
2402                 fprintf(f,
2403                         "%sSystemCallErrorNumber: %s\n",
2404                         prefix, strna(errno_to_name(c->syscall_errno)));
2405
2406         if (c->apparmor_profile)
2407                 fprintf(f,
2408                         "%sAppArmorProfile: %s%s\n",
2409                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2410 }
2411
2412 bool exec_context_maintains_privileges(ExecContext *c) {
2413         assert(c);
2414
2415         /* Returns true if the process forked off would run run under
2416          * an unchanged UID or as root. */
2417
2418         if (!c->user)
2419                 return true;
2420
2421         if (streq(c->user, "root") || streq(c->user, "0"))
2422                 return true;
2423
2424         return false;
2425 }
2426
2427 void exec_status_start(ExecStatus *s, pid_t pid) {
2428         assert(s);
2429
2430         zero(*s);
2431         s->pid = pid;
2432         dual_timestamp_get(&s->start_timestamp);
2433 }
2434
2435 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2436         assert(s);
2437
2438         if (s->pid && s->pid != pid)
2439                 zero(*s);
2440
2441         s->pid = pid;
2442         dual_timestamp_get(&s->exit_timestamp);
2443
2444         s->code = code;
2445         s->status = status;
2446
2447         if (context) {
2448                 if (context->utmp_id)
2449                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2450
2451                 exec_context_tty_reset(context);
2452         }
2453 }
2454
2455 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2456         char buf[FORMAT_TIMESTAMP_MAX];
2457
2458         assert(s);
2459         assert(f);
2460
2461         if (s->pid <= 0)
2462                 return;
2463
2464         prefix = strempty(prefix);
2465
2466         fprintf(f,
2467                 "%sPID: "PID_FMT"\n",
2468                 prefix, s->pid);
2469
2470         if (s->start_timestamp.realtime > 0)
2471                 fprintf(f,
2472                         "%sStart Timestamp: %s\n",
2473                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2474
2475         if (s->exit_timestamp.realtime > 0)
2476                 fprintf(f,
2477                         "%sExit Timestamp: %s\n"
2478                         "%sExit Code: %s\n"
2479                         "%sExit Status: %i\n",
2480                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2481                         prefix, sigchld_code_to_string(s->code),
2482                         prefix, s->status);
2483 }
2484
2485 char *exec_command_line(char **argv) {
2486         size_t k;
2487         char *n, *p, **a;
2488         bool first = true;
2489
2490         assert(argv);
2491
2492         k = 1;
2493         STRV_FOREACH(a, argv)
2494                 k += strlen(*a)+3;
2495
2496         if (!(n = new(char, k)))
2497                 return NULL;
2498
2499         p = n;
2500         STRV_FOREACH(a, argv) {
2501
2502                 if (!first)
2503                         *(p++) = ' ';
2504                 else
2505                         first = false;
2506
2507                 if (strpbrk(*a, WHITESPACE)) {
2508                         *(p++) = '\'';
2509                         p = stpcpy(p, *a);
2510                         *(p++) = '\'';
2511                 } else
2512                         p = stpcpy(p, *a);
2513
2514         }
2515
2516         *p = 0;
2517
2518         /* FIXME: this doesn't really handle arguments that have
2519          * spaces and ticks in them */
2520
2521         return n;
2522 }
2523
2524 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2525         _cleanup_free_ char *cmd = NULL;
2526         const char *prefix2;
2527
2528         assert(c);
2529         assert(f);
2530
2531         prefix = strempty(prefix);
2532         prefix2 = strappenda(prefix, "\t");
2533
2534         cmd = exec_command_line(c->argv);
2535         fprintf(f,
2536                 "%sCommand Line: %s\n",
2537                 prefix, cmd ? cmd : strerror(ENOMEM));
2538
2539         exec_status_dump(&c->exec_status, f, prefix2);
2540 }
2541
2542 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2543         assert(f);
2544
2545         prefix = strempty(prefix);
2546
2547         LIST_FOREACH(command, c, c)
2548                 exec_command_dump(c, f, prefix);
2549 }
2550
2551 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2552         ExecCommand *end;
2553
2554         assert(l);
2555         assert(e);
2556
2557         if (*l) {
2558                 /* It's kind of important, that we keep the order here */
2559                 LIST_FIND_TAIL(command, *l, end);
2560                 LIST_INSERT_AFTER(command, *l, end, e);
2561         } else
2562               *l = e;
2563 }
2564
2565 int exec_command_set(ExecCommand *c, const char *path, ...) {
2566         va_list ap;
2567         char **l, *p;
2568
2569         assert(c);
2570         assert(path);
2571
2572         va_start(ap, path);
2573         l = strv_new_ap(path, ap);
2574         va_end(ap);
2575
2576         if (!l)
2577                 return -ENOMEM;
2578
2579         p = strdup(path);
2580         if (!p) {
2581                 strv_free(l);
2582                 return -ENOMEM;
2583         }
2584
2585         free(c->path);
2586         c->path = p;
2587
2588         strv_free(c->argv);
2589         c->argv = l;
2590
2591         return 0;
2592 }
2593
2594 int exec_command_append(ExecCommand *c, const char *path, ...) {
2595         _cleanup_strv_free_ char **l = NULL;
2596         va_list ap;
2597         int r;
2598
2599         assert(c);
2600         assert(path);
2601
2602         va_start(ap, path);
2603         l = strv_new_ap(path, ap);
2604         va_end(ap);
2605
2606         if (!l)
2607                 return -ENOMEM;
2608
2609         r = strv_extend_strv(&c->argv, l);
2610         if (r < 0)
2611                 return r;
2612
2613         return 0;
2614 }
2615
2616
2617 static int exec_runtime_allocate(ExecRuntime **rt) {
2618
2619         if (*rt)
2620                 return 0;
2621
2622         *rt = new0(ExecRuntime, 1);
2623         if (!*rt)
2624                 return -ENOMEM;
2625
2626         (*rt)->n_ref = 1;
2627         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2628
2629         return 0;
2630 }
2631
2632 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2633         int r;
2634
2635         assert(rt);
2636         assert(c);
2637         assert(id);
2638
2639         if (*rt)
2640                 return 1;
2641
2642         if (!c->private_network && !c->private_tmp)
2643                 return 0;
2644
2645         r = exec_runtime_allocate(rt);
2646         if (r < 0)
2647                 return r;
2648
2649         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2650                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2651                         return -errno;
2652         }
2653
2654         if (c->private_tmp && !(*rt)->tmp_dir) {
2655                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2656                 if (r < 0)
2657                         return r;
2658         }
2659
2660         return 1;
2661 }
2662
2663 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2664         assert(r);
2665         assert(r->n_ref > 0);
2666
2667         r->n_ref++;
2668         return r;
2669 }
2670
2671 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2672
2673         if (!r)
2674                 return NULL;
2675
2676         assert(r->n_ref > 0);
2677
2678         r->n_ref--;
2679         if (r->n_ref <= 0) {
2680                 free(r->tmp_dir);
2681                 free(r->var_tmp_dir);
2682                 safe_close_pair(r->netns_storage_socket);
2683                 free(r);
2684         }
2685
2686         return NULL;
2687 }
2688
2689 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2690         assert(u);
2691         assert(f);
2692         assert(fds);
2693
2694         if (!rt)
2695                 return 0;
2696
2697         if (rt->tmp_dir)
2698                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2699
2700         if (rt->var_tmp_dir)
2701                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2702
2703         if (rt->netns_storage_socket[0] >= 0) {
2704                 int copy;
2705
2706                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2707                 if (copy < 0)
2708                         return copy;
2709
2710                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2711         }
2712
2713         if (rt->netns_storage_socket[1] >= 0) {
2714                 int copy;
2715
2716                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2717                 if (copy < 0)
2718                         return copy;
2719
2720                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2721         }
2722
2723         return 0;
2724 }
2725
2726 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2727         int r;
2728
2729         assert(rt);
2730         assert(key);
2731         assert(value);
2732
2733         if (streq(key, "tmp-dir")) {
2734                 char *copy;
2735
2736                 r = exec_runtime_allocate(rt);
2737                 if (r < 0)
2738                         return r;
2739
2740                 copy = strdup(value);
2741                 if (!copy)
2742                         return log_oom();
2743
2744                 free((*rt)->tmp_dir);
2745                 (*rt)->tmp_dir = copy;
2746
2747         } else if (streq(key, "var-tmp-dir")) {
2748                 char *copy;
2749
2750                 r = exec_runtime_allocate(rt);
2751                 if (r < 0)
2752                         return r;
2753
2754                 copy = strdup(value);
2755                 if (!copy)
2756                         return log_oom();
2757
2758                 free((*rt)->var_tmp_dir);
2759                 (*rt)->var_tmp_dir = copy;
2760
2761         } else if (streq(key, "netns-socket-0")) {
2762                 int fd;
2763
2764                 r = exec_runtime_allocate(rt);
2765                 if (r < 0)
2766                         return r;
2767
2768                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2769                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2770                 else {
2771                         safe_close((*rt)->netns_storage_socket[0]);
2772                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2773                 }
2774         } else if (streq(key, "netns-socket-1")) {
2775                 int fd;
2776
2777                 r = exec_runtime_allocate(rt);
2778                 if (r < 0)
2779                         return r;
2780
2781                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2782                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2783                 else {
2784                         safe_close((*rt)->netns_storage_socket[1]);
2785                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2786                 }
2787         } else
2788                 return 0;
2789
2790         return 1;
2791 }
2792
2793 static void *remove_tmpdir_thread(void *p) {
2794         _cleanup_free_ char *path = p;
2795
2796         rm_rf_dangerous(path, false, true, false);
2797         return NULL;
2798 }
2799
2800 void exec_runtime_destroy(ExecRuntime *rt) {
2801         int r;
2802
2803         if (!rt)
2804                 return;
2805
2806         /* If there are multiple users of this, let's leave the stuff around */
2807         if (rt->n_ref > 1)
2808                 return;
2809
2810         if (rt->tmp_dir) {
2811                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2812
2813                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2814                 if (r < 0) {
2815                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2816                         free(rt->tmp_dir);
2817                 }
2818
2819                 rt->tmp_dir = NULL;
2820         }
2821
2822         if (rt->var_tmp_dir) {
2823                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2824
2825                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2826                 if (r < 0) {
2827                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2828                         free(rt->var_tmp_dir);
2829                 }
2830
2831                 rt->var_tmp_dir = NULL;
2832         }
2833
2834         safe_close_pair(rt->netns_storage_socket);
2835 }
2836
2837 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2838         [EXEC_INPUT_NULL] = "null",
2839         [EXEC_INPUT_TTY] = "tty",
2840         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2841         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2842         [EXEC_INPUT_SOCKET] = "socket"
2843 };
2844
2845 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2846
2847 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2848         [EXEC_OUTPUT_INHERIT] = "inherit",
2849         [EXEC_OUTPUT_NULL] = "null",
2850         [EXEC_OUTPUT_TTY] = "tty",
2851         [EXEC_OUTPUT_SYSLOG] = "syslog",
2852         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2853         [EXEC_OUTPUT_KMSG] = "kmsg",
2854         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2855         [EXEC_OUTPUT_JOURNAL] = "journal",
2856         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2857         [EXEC_OUTPUT_SOCKET] = "socket"
2858 };
2859
2860 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);