chiark / gitweb /
db755777c130a90939aae4cc3f57a339db37f6f9
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "bus-kernel.h"
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
93 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94
95 /* This assumes there is a 'tty' group */
96 #define TTY_MODE 0620
97
98 #define SNDBUF_SIZE (8*1024*1024)
99
100 static int shift_fds(int fds[], unsigned n_fds) {
101         int start, restart_from;
102
103         if (n_fds <= 0)
104                 return 0;
105
106         /* Modifies the fds array! (sorts it) */
107
108         assert(fds);
109
110         start = 0;
111         for (;;) {
112                 int i;
113
114                 restart_from = -1;
115
116                 for (i = start; i < (int) n_fds; i++) {
117                         int nfd;
118
119                         /* Already at right index? */
120                         if (fds[i] == i+3)
121                                 continue;
122
123                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
124                                 return -errno;
125
126                         safe_close(fds[i]);
127                         fds[i] = nfd;
128
129                         /* Hmm, the fd we wanted isn't free? Then
130                          * let's remember that and try again from here*/
131                         if (nfd != i+3 && restart_from < 0)
132                                 restart_from = i;
133                 }
134
135                 if (restart_from < 0)
136                         break;
137
138                 start = restart_from;
139         }
140
141         return 0;
142 }
143
144 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
145         unsigned i;
146         int r;
147
148         if (n_fds <= 0)
149                 return 0;
150
151         assert(fds);
152
153         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154
155         for (i = 0; i < n_fds; i++) {
156
157                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
158                         return r;
159
160                 /* We unconditionally drop FD_CLOEXEC from the fds,
161                  * since after all we want to pass these fds to our
162                  * children */
163
164                 if ((r = fd_cloexec(fds[i], false)) < 0)
165                         return r;
166         }
167
168         return 0;
169 }
170
171 _pure_ static const char *tty_path(const ExecContext *context) {
172         assert(context);
173
174         if (context->tty_path)
175                 return context->tty_path;
176
177         return "/dev/console";
178 }
179
180 static void exec_context_tty_reset(const ExecContext *context) {
181         assert(context);
182
183         if (context->tty_vhangup)
184                 terminal_vhangup(tty_path(context));
185
186         if (context->tty_reset)
187                 reset_terminal(tty_path(context));
188
189         if (context->tty_vt_disallocate && context->tty_path)
190                 vt_disallocate(context->tty_path);
191 }
192
193 static bool is_terminal_output(ExecOutput o) {
194         return
195                 o == EXEC_OUTPUT_TTY ||
196                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
199 }
200
201 static int open_null_as(int flags, int nfd) {
202         int fd, r;
203
204         assert(nfd >= 0);
205
206         fd = open("/dev/null", flags|O_NOCTTY);
207         if (fd < 0)
208                 return -errno;
209
210         if (fd != nfd) {
211                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
212                 safe_close(fd);
213         } else
214                 r = nfd;
215
216         return r;
217 }
218
219 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220         int fd, r;
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225
226         assert(context);
227         assert(output < _EXEC_OUTPUT_MAX);
228         assert(ident);
229         assert(nfd >= 0);
230
231         fd = socket(AF_UNIX, SOCK_STREAM, 0);
232         if (fd < 0)
233                 return -errno;
234
235         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
236         if (r < 0) {
237                 safe_close(fd);
238                 return -errno;
239         }
240
241         if (shutdown(fd, SHUT_RD) < 0) {
242                 safe_close(fd);
243                 return -errno;
244         }
245
246         fd_inc_sndbuf(fd, SNDBUF_SIZE);
247
248         dprintf(fd,
249                 "%s\n"
250                 "%s\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n",
256                 context->syslog_identifier ? context->syslog_identifier : ident,
257                 unit_id,
258                 context->syslog_priority,
259                 !!context->syslog_level_prefix,
260                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
261                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
262                 is_terminal_output(output));
263
264         if (fd != nfd) {
265                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
266                 safe_close(fd);
267         } else
268                 r = nfd;
269
270         return r;
271 }
272 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
273         int fd, r;
274
275         assert(path);
276         assert(nfd >= 0);
277
278         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
279                 return fd;
280
281         if (fd != nfd) {
282                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
283                 safe_close(fd);
284         } else
285                 r = nfd;
286
287         return r;
288 }
289
290 static bool is_terminal_input(ExecInput i) {
291         return
292                 i == EXEC_INPUT_TTY ||
293                 i == EXEC_INPUT_TTY_FORCE ||
294                 i == EXEC_INPUT_TTY_FAIL;
295 }
296
297 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298
299         if (is_terminal_input(std_input) && !apply_tty_stdin)
300                 return EXEC_INPUT_NULL;
301
302         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
303                 return EXEC_INPUT_NULL;
304
305         return std_input;
306 }
307
308 static int fixup_output(ExecOutput std_output, int socket_fd) {
309
310         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
311                 return EXEC_OUTPUT_INHERIT;
312
313         return std_output;
314 }
315
316 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
317         ExecInput i;
318
319         assert(context);
320
321         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
322
323         switch (i) {
324
325         case EXEC_INPUT_NULL:
326                 return open_null_as(O_RDONLY, STDIN_FILENO);
327
328         case EXEC_INPUT_TTY:
329         case EXEC_INPUT_TTY_FORCE:
330         case EXEC_INPUT_TTY_FAIL: {
331                 int fd, r;
332
333                 fd = acquire_terminal(tty_path(context),
334                                       i == EXEC_INPUT_TTY_FAIL,
335                                       i == EXEC_INPUT_TTY_FORCE,
336                                       false,
337                                       USEC_INFINITY);
338                 if (fd < 0)
339                         return fd;
340
341                 if (fd != STDIN_FILENO) {
342                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
343                         safe_close(fd);
344                 } else
345                         r = STDIN_FILENO;
346
347                 return r;
348         }
349
350         case EXEC_INPUT_SOCKET:
351                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
352
353         default:
354                 assert_not_reached("Unknown input type");
355         }
356 }
357
358 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
359         ExecOutput o;
360         ExecInput i;
361         int r;
362
363         assert(context);
364         assert(ident);
365
366         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
367         o = fixup_output(context->std_output, socket_fd);
368
369         if (fileno == STDERR_FILENO) {
370                 ExecOutput e;
371                 e = fixup_output(context->std_error, socket_fd);
372
373                 /* This expects the input and output are already set up */
374
375                 /* Don't change the stderr file descriptor if we inherit all
376                  * the way and are not on a tty */
377                 if (e == EXEC_OUTPUT_INHERIT &&
378                     o == EXEC_OUTPUT_INHERIT &&
379                     i == EXEC_INPUT_NULL &&
380                     !is_terminal_input(context->std_input) &&
381                     getppid () != 1)
382                         return fileno;
383
384                 /* Duplicate from stdout if possible */
385                 if (e == o || e == EXEC_OUTPUT_INHERIT)
386                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
387
388                 o = e;
389
390         } else if (o == EXEC_OUTPUT_INHERIT) {
391                 /* If input got downgraded, inherit the original value */
392                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
393                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394
395                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
396                 if (i != EXEC_INPUT_NULL)
397                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398
399                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
400                 if (getppid() != 1)
401                         return fileno;
402
403                 /* We need to open /dev/null here anew, to get the right access mode. */
404                 return open_null_as(O_WRONLY, fileno);
405         }
406
407         switch (o) {
408
409         case EXEC_OUTPUT_NULL:
410                 return open_null_as(O_WRONLY, fileno);
411
412         case EXEC_OUTPUT_TTY:
413                 if (is_terminal_input(i))
414                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415
416                 /* We don't reset the terminal if this is just about output */
417                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418
419         case EXEC_OUTPUT_SYSLOG:
420         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421         case EXEC_OUTPUT_KMSG:
422         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423         case EXEC_OUTPUT_JOURNAL:
424         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425                 r = connect_logger_as(context, o, ident, unit_id, fileno);
426                 if (r < 0) {
427                         log_struct_unit(LOG_CRIT, unit_id,
428                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
429                                 fileno == STDOUT_FILENO ? "out" : "err",
430                                 unit_id, strerror(-r),
431                                 "ERRNO=%d", -r,
432                                 NULL);
433                         r = open_null_as(O_WRONLY, fileno);
434                 }
435                 return r;
436
437         case EXEC_OUTPUT_SOCKET:
438                 assert(socket_fd >= 0);
439                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
440
441         default:
442                 assert_not_reached("Unknown error type");
443         }
444 }
445
446 static int chown_terminal(int fd, uid_t uid) {
447         struct stat st;
448
449         assert(fd >= 0);
450
451         /* This might fail. What matters are the results. */
452         (void) fchown(fd, uid, -1);
453         (void) fchmod(fd, TTY_MODE);
454
455         if (fstat(fd, &st) < 0)
456                 return -errno;
457
458         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
459                 return -EPERM;
460
461         return 0;
462 }
463
464 static int setup_confirm_stdio(int *_saved_stdin,
465                                int *_saved_stdout) {
466         int fd = -1, saved_stdin, saved_stdout = -1, r;
467
468         assert(_saved_stdin);
469         assert(_saved_stdout);
470
471         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
472         if (saved_stdin < 0)
473                 return -errno;
474
475         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
476         if (saved_stdout < 0) {
477                 r = errno;
478                 goto fail;
479         }
480
481         fd = acquire_terminal(
482                         "/dev/console",
483                         false,
484                         false,
485                         false,
486                         DEFAULT_CONFIRM_USEC);
487         if (fd < 0) {
488                 r = fd;
489                 goto fail;
490         }
491
492         r = chown_terminal(fd, getuid());
493         if (r < 0)
494                 goto fail;
495
496         if (dup2(fd, STDIN_FILENO) < 0) {
497                 r = -errno;
498                 goto fail;
499         }
500
501         if (dup2(fd, STDOUT_FILENO) < 0) {
502                 r = -errno;
503                 goto fail;
504         }
505
506         if (fd >= 2)
507                 safe_close(fd);
508
509         *_saved_stdin = saved_stdin;
510         *_saved_stdout = saved_stdout;
511
512         return 0;
513
514 fail:
515         safe_close(saved_stdout);
516         safe_close(saved_stdin);
517         safe_close(fd);
518
519         return r;
520 }
521
522 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
523         _cleanup_close_ int fd = -1;
524         va_list ap;
525
526         assert(format);
527
528         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
529         if (fd < 0)
530                 return fd;
531
532         va_start(ap, format);
533         vdprintf(fd, format, ap);
534         va_end(ap);
535
536         return 0;
537 }
538
539 static int restore_confirm_stdio(int *saved_stdin,
540                                  int *saved_stdout) {
541
542         int r = 0;
543
544         assert(saved_stdin);
545         assert(saved_stdout);
546
547         release_terminal();
548
549         if (*saved_stdin >= 0)
550                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
551                         r = -errno;
552
553         if (*saved_stdout >= 0)
554                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
555                         r = -errno;
556
557         safe_close(*saved_stdin);
558         safe_close(*saved_stdout);
559
560         return r;
561 }
562
563 static int ask_for_confirmation(char *response, char **argv) {
564         int saved_stdout = -1, saved_stdin = -1, r;
565         _cleanup_free_ char *line = NULL;
566
567         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
568         if (r < 0)
569                 return r;
570
571         line = exec_command_line(argv);
572         if (!line)
573                 return -ENOMEM;
574
575         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
576
577         restore_confirm_stdio(&saved_stdin, &saved_stdout);
578
579         return r;
580 }
581
582 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
583         bool keep_groups = false;
584         int r;
585
586         assert(context);
587
588         /* Lookup and set GID and supplementary group list. Here too
589          * we avoid NSS lookups for gid=0. */
590
591         if (context->group || username) {
592
593                 if (context->group) {
594                         const char *g = context->group;
595
596                         if ((r = get_group_creds(&g, &gid)) < 0)
597                                 return r;
598                 }
599
600                 /* First step, initialize groups from /etc/groups */
601                 if (username && gid != 0) {
602                         if (initgroups(username, gid) < 0)
603                                 return -errno;
604
605                         keep_groups = true;
606                 }
607
608                 /* Second step, set our gids */
609                 if (setresgid(gid, gid, gid) < 0)
610                         return -errno;
611         }
612
613         if (context->supplementary_groups) {
614                 int ngroups_max, k;
615                 gid_t *gids;
616                 char **i;
617
618                 /* Final step, initialize any manually set supplementary groups */
619                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
620
621                 if (!(gids = new(gid_t, ngroups_max)))
622                         return -ENOMEM;
623
624                 if (keep_groups) {
625                         if ((k = getgroups(ngroups_max, gids)) < 0) {
626                                 free(gids);
627                                 return -errno;
628                         }
629                 } else
630                         k = 0;
631
632                 STRV_FOREACH(i, context->supplementary_groups) {
633                         const char *g;
634
635                         if (k >= ngroups_max) {
636                                 free(gids);
637                                 return -E2BIG;
638                         }
639
640                         g = *i;
641                         r = get_group_creds(&g, gids+k);
642                         if (r < 0) {
643                                 free(gids);
644                                 return r;
645                         }
646
647                         k++;
648                 }
649
650                 if (setgroups(k, gids) < 0) {
651                         free(gids);
652                         return -errno;
653                 }
654
655                 free(gids);
656         }
657
658         return 0;
659 }
660
661 static int enforce_user(const ExecContext *context, uid_t uid) {
662         assert(context);
663
664         /* Sets (but doesn't lookup) the uid and make sure we keep the
665          * capabilities while doing so. */
666
667         if (context->capabilities) {
668                 _cleanup_cap_free_ cap_t d = NULL;
669                 static const cap_value_t bits[] = {
670                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
671                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
672                 };
673
674                 /* First step: If we need to keep capabilities but
675                  * drop privileges we need to make sure we keep our
676                  * caps, while we drop privileges. */
677                 if (uid != 0) {
678                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
679
680                         if (prctl(PR_GET_SECUREBITS) != sb)
681                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682                                         return -errno;
683                 }
684
685                 /* Second step: set the capabilities. This will reduce
686                  * the capabilities to the minimum we need. */
687
688                 d = cap_dup(context->capabilities);
689                 if (!d)
690                         return -errno;
691
692                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
693                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
694                         return -errno;
695
696                 if (cap_set_proc(d) < 0)
697                         return -errno;
698         }
699
700         /* Third step: actually set the uids */
701         if (setresuid(uid, uid, uid) < 0)
702                 return -errno;
703
704         /* At this point we should have all necessary capabilities but
705            are otherwise a normal user. However, the caps might got
706            corrupted due to the setresuid() so we need clean them up
707            later. This is done outside of this call. */
708
709         return 0;
710 }
711
712 #ifdef HAVE_PAM
713
714 static int null_conv(
715                 int num_msg,
716                 const struct pam_message **msg,
717                 struct pam_response **resp,
718                 void *appdata_ptr) {
719
720         /* We don't support conversations */
721
722         return PAM_CONV_ERR;
723 }
724
725 static int setup_pam(
726                 const char *name,
727                 const char *user,
728                 uid_t uid,
729                 const char *tty,
730                 char ***pam_env,
731                 int fds[], unsigned n_fds) {
732
733         static const struct pam_conv conv = {
734                 .conv = null_conv,
735                 .appdata_ptr = NULL
736         };
737
738         pam_handle_t *handle = NULL;
739         sigset_t ss, old_ss;
740         int pam_code = PAM_SUCCESS;
741         int err;
742         char **e = NULL;
743         bool close_session = false;
744         pid_t pam_pid = 0, parent_pid;
745         int flags = 0;
746
747         assert(name);
748         assert(user);
749         assert(pam_env);
750
751         /* We set up PAM in the parent process, then fork. The child
752          * will then stay around until killed via PR_GET_PDEATHSIG or
753          * systemd via the cgroup logic. It will then remove the PAM
754          * session again. The parent process will exec() the actual
755          * daemon. We do things this way to ensure that the main PID
756          * of the daemon is the one we initially fork()ed. */
757
758         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
759                 flags |= PAM_SILENT;
760
761         pam_code = pam_start(name, user, &conv, &handle);
762         if (pam_code != PAM_SUCCESS) {
763                 handle = NULL;
764                 goto fail;
765         }
766
767         if (tty) {
768                 pam_code = pam_set_item(handle, PAM_TTY, tty);
769                 if (pam_code != PAM_SUCCESS)
770                         goto fail;
771         }
772
773         pam_code = pam_acct_mgmt(handle, flags);
774         if (pam_code != PAM_SUCCESS)
775                 goto fail;
776
777         pam_code = pam_open_session(handle, flags);
778         if (pam_code != PAM_SUCCESS)
779                 goto fail;
780
781         close_session = true;
782
783         e = pam_getenvlist(handle);
784         if (!e) {
785                 pam_code = PAM_BUF_ERR;
786                 goto fail;
787         }
788
789         /* Block SIGTERM, so that we know that it won't get lost in
790          * the child */
791         if (sigemptyset(&ss) < 0 ||
792             sigaddset(&ss, SIGTERM) < 0 ||
793             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
794                 goto fail;
795
796         parent_pid = getpid();
797
798         pam_pid = fork();
799         if (pam_pid < 0)
800                 goto fail;
801
802         if (pam_pid == 0) {
803                 int sig;
804                 int r = EXIT_PAM;
805
806                 /* The child's job is to reset the PAM session on
807                  * termination */
808
809                 /* This string must fit in 10 chars (i.e. the length
810                  * of "/sbin/init"), to look pretty in /bin/ps */
811                 rename_process("(sd-pam)");
812
813                 /* Make sure we don't keep open the passed fds in this
814                 child. We assume that otherwise only those fds are
815                 open here that have been opened by PAM. */
816                 close_many(fds, n_fds);
817
818                 /* Drop privileges - we don't need any to pam_close_session
819                  * and this will make PR_SET_PDEATHSIG work in most cases.
820                  * If this fails, ignore the error - but expect sd-pam threads
821                  * to fail to exit normally */
822                 if (setresuid(uid, uid, uid) < 0)
823                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
824
825                 /* Wait until our parent died. This will only work if
826                  * the above setresuid() succeeds, otherwise the kernel
827                  * will not allow unprivileged parents kill their privileged
828                  * children this way. We rely on the control groups kill logic
829                  * to do the rest for us. */
830                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
831                         goto child_finish;
832
833                 /* Check if our parent process might already have
834                  * died? */
835                 if (getppid() == parent_pid) {
836                         for (;;) {
837                                 if (sigwait(&ss, &sig) < 0) {
838                                         if (errno == EINTR)
839                                                 continue;
840
841                                         goto child_finish;
842                                 }
843
844                                 assert(sig == SIGTERM);
845                                 break;
846                         }
847                 }
848
849                 /* If our parent died we'll end the session */
850                 if (getppid() != parent_pid) {
851                         pam_code = pam_close_session(handle, flags);
852                         if (pam_code != PAM_SUCCESS)
853                                 goto child_finish;
854                 }
855
856                 r = 0;
857
858         child_finish:
859                 pam_end(handle, pam_code | flags);
860                 _exit(r);
861         }
862
863         /* If the child was forked off successfully it will do all the
864          * cleanups, so forget about the handle here. */
865         handle = NULL;
866
867         /* Unblock SIGTERM again in the parent */
868         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
869                 goto fail;
870
871         /* We close the log explicitly here, since the PAM modules
872          * might have opened it, but we don't want this fd around. */
873         closelog();
874
875         *pam_env = e;
876         e = NULL;
877
878         return 0;
879
880 fail:
881         if (pam_code != PAM_SUCCESS) {
882                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
883                 err = -EPERM;  /* PAM errors do not map to errno */
884         } else {
885                 log_error("PAM failed: %m");
886                 err = -errno;
887         }
888
889         if (handle) {
890                 if (close_session)
891                         pam_code = pam_close_session(handle, flags);
892
893                 pam_end(handle, pam_code | flags);
894         }
895
896         strv_free(e);
897
898         closelog();
899
900         if (pam_pid > 1) {
901                 kill(pam_pid, SIGTERM);
902                 kill(pam_pid, SIGCONT);
903         }
904
905         return err;
906 }
907 #endif
908
909 static void rename_process_from_path(const char *path) {
910         char process_name[11];
911         const char *p;
912         size_t l;
913
914         /* This resulting string must fit in 10 chars (i.e. the length
915          * of "/sbin/init") to look pretty in /bin/ps */
916
917         p = basename(path);
918         if (isempty(p)) {
919                 rename_process("(...)");
920                 return;
921         }
922
923         l = strlen(p);
924         if (l > 8) {
925                 /* The end of the process name is usually more
926                  * interesting, since the first bit might just be
927                  * "systemd-" */
928                 p = p + l - 8;
929                 l = 8;
930         }
931
932         process_name[0] = '(';
933         memcpy(process_name+1, p, l);
934         process_name[1+l] = ')';
935         process_name[1+l+1] = 0;
936
937         rename_process(process_name);
938 }
939
940 #ifdef HAVE_SECCOMP
941
942 static int apply_seccomp(const ExecContext *c) {
943         uint32_t negative_action, action;
944         scmp_filter_ctx *seccomp;
945         Iterator i;
946         void *id;
947         int r;
948
949         assert(c);
950
951         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
952
953         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
954         if (!seccomp)
955                 return -ENOMEM;
956
957         if (c->syscall_archs) {
958
959                 SET_FOREACH(id, c->syscall_archs, i) {
960                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
961                         if (r == -EEXIST)
962                                 continue;
963                         if (r < 0)
964                                 goto finish;
965                 }
966
967         } else {
968                 r = seccomp_add_secondary_archs(seccomp);
969                 if (r < 0)
970                         goto finish;
971         }
972
973         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
974         SET_FOREACH(id, c->syscall_filter, i) {
975                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
976                 if (r < 0)
977                         goto finish;
978         }
979
980         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
981         if (r < 0)
982                 goto finish;
983
984         r = seccomp_load(seccomp);
985
986 finish:
987         seccomp_release(seccomp);
988         return r;
989 }
990
991 static int apply_address_families(const ExecContext *c) {
992         scmp_filter_ctx *seccomp;
993         Iterator i;
994         int r;
995
996         assert(c);
997
998         seccomp = seccomp_init(SCMP_ACT_ALLOW);
999         if (!seccomp)
1000                 return -ENOMEM;
1001
1002         r = seccomp_add_secondary_archs(seccomp);
1003         if (r < 0)
1004                 goto finish;
1005
1006         if (c->address_families_whitelist) {
1007                 int af, first = 0, last = 0;
1008                 void *afp;
1009
1010                 /* If this is a whitelist, we first block the address
1011                  * families that are out of range and then everything
1012                  * that is not in the set. First, we find the lowest
1013                  * and highest address family in the set. */
1014
1015                 SET_FOREACH(afp, c->address_families, i) {
1016                         af = PTR_TO_INT(afp);
1017
1018                         if (af <= 0 || af >= af_max())
1019                                 continue;
1020
1021                         if (first == 0 || af < first)
1022                                 first = af;
1023
1024                         if (last == 0 || af > last)
1025                                 last = af;
1026                 }
1027
1028                 assert((first == 0) == (last == 0));
1029
1030                 if (first == 0) {
1031
1032                         /* No entries in the valid range, block everything */
1033                         r = seccomp_rule_add(
1034                                         seccomp,
1035                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1036                                         SCMP_SYS(socket),
1037                                         0);
1038                         if (r < 0)
1039                                 goto finish;
1040
1041                 } else {
1042
1043                         /* Block everything below the first entry */
1044                         r = seccomp_rule_add(
1045                                         seccomp,
1046                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1047                                         SCMP_SYS(socket),
1048                                         1,
1049                                         SCMP_A0(SCMP_CMP_LT, first));
1050                         if (r < 0)
1051                                 goto finish;
1052
1053                         /* Block everything above the last entry */
1054                         r = seccomp_rule_add(
1055                                         seccomp,
1056                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1057                                         SCMP_SYS(socket),
1058                                         1,
1059                                         SCMP_A0(SCMP_CMP_GT, last));
1060                         if (r < 0)
1061                                 goto finish;
1062
1063                         /* Block everything between the first and last
1064                          * entry */
1065                         for (af = 1; af < af_max(); af++) {
1066
1067                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1068                                         continue;
1069
1070                                 r = seccomp_rule_add(
1071                                                 seccomp,
1072                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1073                                                 SCMP_SYS(socket),
1074                                                 1,
1075                                                 SCMP_A0(SCMP_CMP_EQ, af));
1076                                 if (r < 0)
1077                                         goto finish;
1078                         }
1079                 }
1080
1081         } else {
1082                 void *af;
1083
1084                 /* If this is a blacklist, then generate one rule for
1085                  * each address family that are then combined in OR
1086                  * checks. */
1087
1088                 SET_FOREACH(af, c->address_families, i) {
1089
1090                         r = seccomp_rule_add(
1091                                         seccomp,
1092                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1093                                         SCMP_SYS(socket),
1094                                         1,
1095                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1096                         if (r < 0)
1097                                 goto finish;
1098                 }
1099         }
1100
1101         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1102         if (r < 0)
1103                 goto finish;
1104
1105         r = seccomp_load(seccomp);
1106
1107 finish:
1108         seccomp_release(seccomp);
1109         return r;
1110 }
1111
1112 #endif
1113
1114 static void do_idle_pipe_dance(int idle_pipe[4]) {
1115         assert(idle_pipe);
1116
1117
1118         safe_close(idle_pipe[1]);
1119         safe_close(idle_pipe[2]);
1120
1121         if (idle_pipe[0] >= 0) {
1122                 int r;
1123
1124                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1125
1126                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1127                         /* Signal systemd that we are bored and want to continue. */
1128                         write(idle_pipe[3], "x", 1);
1129
1130                         /* Wait for systemd to react to the signal above. */
1131                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1132                 }
1133
1134                 safe_close(idle_pipe[0]);
1135
1136         }
1137
1138         safe_close(idle_pipe[3]);
1139 }
1140
1141 static int build_environment(
1142                 const ExecContext *c,
1143                 unsigned n_fds,
1144                 usec_t watchdog_usec,
1145                 const char *home,
1146                 const char *username,
1147                 const char *shell,
1148                 char ***ret) {
1149
1150         _cleanup_strv_free_ char **our_env = NULL;
1151         unsigned n_env = 0;
1152         char *x;
1153
1154         assert(c);
1155         assert(ret);
1156
1157         our_env = new0(char*, 10);
1158         if (!our_env)
1159                 return -ENOMEM;
1160
1161         if (n_fds > 0) {
1162                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1163                         return -ENOMEM;
1164                 our_env[n_env++] = x;
1165
1166                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1167                         return -ENOMEM;
1168                 our_env[n_env++] = x;
1169         }
1170
1171         if (watchdog_usec > 0) {
1172                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1173                         return -ENOMEM;
1174                 our_env[n_env++] = x;
1175
1176                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1177                         return -ENOMEM;
1178                 our_env[n_env++] = x;
1179         }
1180
1181         if (home) {
1182                 x = strappend("HOME=", home);
1183                 if (!x)
1184                         return -ENOMEM;
1185                 our_env[n_env++] = x;
1186         }
1187
1188         if (username) {
1189                 x = strappend("LOGNAME=", username);
1190                 if (!x)
1191                         return -ENOMEM;
1192                 our_env[n_env++] = x;
1193
1194                 x = strappend("USER=", username);
1195                 if (!x)
1196                         return -ENOMEM;
1197                 our_env[n_env++] = x;
1198         }
1199
1200         if (shell) {
1201                 x = strappend("SHELL=", shell);
1202                 if (!x)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205         }
1206
1207         if (is_terminal_input(c->std_input) ||
1208             c->std_output == EXEC_OUTPUT_TTY ||
1209             c->std_error == EXEC_OUTPUT_TTY ||
1210             c->tty_path) {
1211
1212                 x = strdup(default_term_for_tty(tty_path(c)));
1213                 if (!x)
1214                         return -ENOMEM;
1215                 our_env[n_env++] = x;
1216         }
1217
1218         our_env[n_env++] = NULL;
1219         assert(n_env <= 10);
1220
1221         *ret = our_env;
1222         our_env = NULL;
1223
1224         return 0;
1225 }
1226
1227 static int exec_child(ExecCommand *command,
1228                       const ExecContext *context,
1229                       const ExecParameters *params,
1230                       ExecRuntime *runtime,
1231                       char **argv,
1232                       int socket_fd,
1233                       int *fds, unsigned n_fds,
1234                       char **files_env,
1235                       int *error) {
1236
1237         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1238         const char *username = NULL, *home = NULL, *shell = NULL;
1239         unsigned n_dont_close = 0;
1240         int dont_close[n_fds + 4];
1241         uid_t uid = (uid_t) -1;
1242         gid_t gid = (gid_t) -1;
1243         int i, err;
1244
1245         assert(command);
1246         assert(context);
1247         assert(params);
1248         assert(error);
1249
1250         rename_process_from_path(command->path);
1251
1252         /* We reset exactly these signals, since they are the
1253          * only ones we set to SIG_IGN in the main daemon. All
1254          * others we leave untouched because we set them to
1255          * SIG_DFL or a valid handler initially, both of which
1256          * will be demoted to SIG_DFL. */
1257         default_signals(SIGNALS_CRASH_HANDLER,
1258                         SIGNALS_IGNORE, -1);
1259
1260         if (context->ignore_sigpipe)
1261                 ignore_signals(SIGPIPE, -1);
1262
1263         err = reset_signal_mask();
1264         if (err < 0) {
1265                 *error = EXIT_SIGNAL_MASK;
1266                 return err;
1267         }
1268
1269         if (params->idle_pipe)
1270                 do_idle_pipe_dance(params->idle_pipe);
1271
1272         /* Close sockets very early to make sure we don't
1273          * block init reexecution because it cannot bind its
1274          * sockets */
1275         log_forget_fds();
1276
1277         if (socket_fd >= 0)
1278                 dont_close[n_dont_close++] = socket_fd;
1279         if (n_fds > 0) {
1280                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1281                 n_dont_close += n_fds;
1282         }
1283         if (params->bus_endpoint_fd >= 0)
1284                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1285         if (runtime) {
1286                 if (runtime->netns_storage_socket[0] >= 0)
1287                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1288                 if (runtime->netns_storage_socket[1] >= 0)
1289                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1290         }
1291
1292         err = close_all_fds(dont_close, n_dont_close);
1293         if (err < 0) {
1294                 *error = EXIT_FDS;
1295                 return err;
1296         }
1297
1298         if (!context->same_pgrp)
1299                 if (setsid() < 0) {
1300                         *error = EXIT_SETSID;
1301                         return -errno;
1302                 }
1303
1304         exec_context_tty_reset(context);
1305
1306         if (params->confirm_spawn) {
1307                 char response;
1308
1309                 err = ask_for_confirmation(&response, argv);
1310                 if (err == -ETIMEDOUT)
1311                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1312                 else if (err < 0)
1313                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1314                 else if (response == 's') {
1315                         write_confirm_message("Skipping execution.\n");
1316                         *error = EXIT_CONFIRM;
1317                         return -ECANCELED;
1318                 } else if (response == 'n') {
1319                         write_confirm_message("Failing execution.\n");
1320                         *error = 0;
1321                         return 0;
1322                 }
1323         }
1324
1325         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1326          * must sure to drop O_NONBLOCK */
1327         if (socket_fd >= 0)
1328                 fd_nonblock(socket_fd, false);
1329
1330         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1331         if (err < 0) {
1332                 *error = EXIT_STDIN;
1333                 return err;
1334         }
1335
1336         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1337         if (err < 0) {
1338                 *error = EXIT_STDOUT;
1339                 return err;
1340         }
1341
1342         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1343         if (err < 0) {
1344                 *error = EXIT_STDERR;
1345                 return err;
1346         }
1347
1348         if (params->cgroup_path) {
1349                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1350                 if (err < 0) {
1351                         *error = EXIT_CGROUP;
1352                         return err;
1353                 }
1354         }
1355
1356         if (context->oom_score_adjust_set) {
1357                 char t[16];
1358
1359                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1360                 char_array_0(t);
1361
1362                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1363                         *error = EXIT_OOM_ADJUST;
1364                         return -errno;
1365                 }
1366         }
1367
1368         if (context->nice_set)
1369                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1370                         *error = EXIT_NICE;
1371                         return -errno;
1372                 }
1373
1374         if (context->cpu_sched_set) {
1375                 struct sched_param param = {
1376                         .sched_priority = context->cpu_sched_priority,
1377                 };
1378
1379                 err = sched_setscheduler(0,
1380                                          context->cpu_sched_policy |
1381                                          (context->cpu_sched_reset_on_fork ?
1382                                           SCHED_RESET_ON_FORK : 0),
1383                                          &param);
1384                 if (err < 0) {
1385                         *error = EXIT_SETSCHEDULER;
1386                         return -errno;
1387                 }
1388         }
1389
1390         if (context->cpuset)
1391                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1392                         *error = EXIT_CPUAFFINITY;
1393                         return -errno;
1394                 }
1395
1396         if (context->ioprio_set)
1397                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1398                         *error = EXIT_IOPRIO;
1399                         return -errno;
1400                 }
1401
1402         if (context->timer_slack_nsec != NSEC_INFINITY)
1403                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1404                         *error = EXIT_TIMERSLACK;
1405                         return -errno;
1406                 }
1407
1408         if (context->personality != 0xffffffffUL)
1409                 if (personality(context->personality) < 0) {
1410                         *error = EXIT_PERSONALITY;
1411                         return -errno;
1412                 }
1413
1414         if (context->utmp_id)
1415                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1416
1417         if (context->user) {
1418                 username = context->user;
1419                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1420                 if (err < 0) {
1421                         *error = EXIT_USER;
1422                         return err;
1423                 }
1424
1425                 if (is_terminal_input(context->std_input)) {
1426                         err = chown_terminal(STDIN_FILENO, uid);
1427                         if (err < 0) {
1428                                 *error = EXIT_STDIN;
1429                                 return err;
1430                         }
1431                 }
1432         }
1433
1434 #ifdef ENABLE_KDBUS
1435         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1436                 uid_t ep_uid = (uid == (uid_t) -1) ? 0 : uid;
1437
1438                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1439                 if (err < 0) {
1440                         *error = EXIT_BUS_ENDPOINT;
1441                         return err;
1442                 }
1443         }
1444 #endif
1445
1446 #ifdef HAVE_PAM
1447         if (params->cgroup_path && context->user && context->pam_name) {
1448                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1449                 if (err < 0) {
1450                         *error = EXIT_CGROUP;
1451                         return err;
1452                 }
1453
1454
1455                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1456                 if (err < 0) {
1457                         *error = EXIT_CGROUP;
1458                         return err;
1459                 }
1460         }
1461 #endif
1462
1463         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1464                 char **rt;
1465
1466                 STRV_FOREACH(rt, context->runtime_directory) {
1467                         _cleanup_free_ char *p;
1468
1469                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1470                         if (!p) {
1471                                 *error = EXIT_RUNTIME_DIRECTORY;
1472                                 return -ENOMEM;
1473                         }
1474
1475                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1476                         if (err < 0) {
1477                                 *error = EXIT_RUNTIME_DIRECTORY;
1478                                 return err;
1479                         }
1480                 }
1481         }
1482
1483         if (params->apply_permissions) {
1484                 err = enforce_groups(context, username, gid);
1485                 if (err < 0) {
1486                         *error = EXIT_GROUP;
1487                         return err;
1488                 }
1489         }
1490
1491         umask(context->umask);
1492
1493 #ifdef HAVE_PAM
1494         if (params->apply_permissions && context->pam_name && username) {
1495                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1496                 if (err < 0) {
1497                         *error = EXIT_PAM;
1498                         return err;
1499                 }
1500         }
1501 #endif
1502
1503         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1504                 err = setup_netns(runtime->netns_storage_socket);
1505                 if (err < 0) {
1506                         *error = EXIT_NETWORK;
1507                         return err;
1508                 }
1509         }
1510
1511         if (!strv_isempty(context->read_write_dirs) ||
1512             !strv_isempty(context->read_only_dirs) ||
1513             !strv_isempty(context->inaccessible_dirs) ||
1514             context->mount_flags != 0 ||
1515             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1516             params->bus_endpoint_path ||
1517             context->private_devices ||
1518             context->protect_system != PROTECT_SYSTEM_NO ||
1519             context->protect_home != PROTECT_HOME_NO) {
1520
1521                 char *tmp = NULL, *var = NULL;
1522
1523                 /* The runtime struct only contains the parent
1524                  * of the private /tmp, which is
1525                  * non-accessible to world users. Inside of it
1526                  * there's a /tmp that is sticky, and that's
1527                  * the one we want to use here. */
1528
1529                 if (context->private_tmp && runtime) {
1530                         if (runtime->tmp_dir)
1531                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1532                         if (runtime->var_tmp_dir)
1533                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1534                 }
1535
1536                 err = setup_namespace(
1537                                 context->read_write_dirs,
1538                                 context->read_only_dirs,
1539                                 context->inaccessible_dirs,
1540                                 tmp,
1541                                 var,
1542                                 params->bus_endpoint_path,
1543                                 context->private_devices,
1544                                 context->protect_home,
1545                                 context->protect_system,
1546                                 context->mount_flags);
1547                 if (err < 0) {
1548                         *error = EXIT_NAMESPACE;
1549                         return err;
1550                 }
1551         }
1552
1553         if (params->apply_chroot) {
1554                 if (context->root_directory)
1555                         if (chroot(context->root_directory) < 0) {
1556                                 *error = EXIT_CHROOT;
1557                                 return -errno;
1558                         }
1559
1560                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1561                         *error = EXIT_CHDIR;
1562                         return -errno;
1563                 }
1564         } else {
1565                 _cleanup_free_ char *d = NULL;
1566
1567                 if (asprintf(&d, "%s/%s",
1568                              context->root_directory ? context->root_directory : "",
1569                              context->working_directory ? context->working_directory : "") < 0) {
1570                         *error = EXIT_MEMORY;
1571                         return -ENOMEM;
1572                 }
1573
1574                 if (chdir(d) < 0) {
1575                         *error = EXIT_CHDIR;
1576                         return -errno;
1577                 }
1578         }
1579
1580         /* We repeat the fd closing here, to make sure that
1581          * nothing is leaked from the PAM modules. Note that
1582          * we are more aggressive this time since socket_fd
1583          * and the netns fds we don't need anymore. The custom
1584          * endpoint fd was needed to upload the policy and can
1585          * now be closed as well. */
1586         err = close_all_fds(fds, n_fds);
1587         if (err >= 0)
1588                 err = shift_fds(fds, n_fds);
1589         if (err >= 0)
1590                 err = flags_fds(fds, n_fds, context->non_blocking);
1591         if (err < 0) {
1592                 *error = EXIT_FDS;
1593                 return err;
1594         }
1595
1596         if (params->apply_permissions) {
1597
1598                 for (i = 0; i < _RLIMIT_MAX; i++) {
1599                         if (!context->rlimit[i])
1600                                 continue;
1601
1602                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1603                                 *error = EXIT_LIMITS;
1604                                 return -errno;
1605                         }
1606                 }
1607
1608                 if (context->capability_bounding_set_drop) {
1609                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1610                         if (err < 0) {
1611                                 *error = EXIT_CAPABILITIES;
1612                                 return err;
1613                         }
1614                 }
1615
1616                 if (context->user) {
1617                         err = enforce_user(context, uid);
1618                         if (err < 0) {
1619                                 *error = EXIT_USER;
1620                                 return err;
1621                         }
1622                 }
1623
1624                 /* PR_GET_SECUREBITS is not privileged, while
1625                  * PR_SET_SECUREBITS is. So to suppress
1626                  * potential EPERMs we'll try not to call
1627                  * PR_SET_SECUREBITS unless necessary. */
1628                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1629                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1630                                 *error = EXIT_SECUREBITS;
1631                                 return -errno;
1632                         }
1633
1634                 if (context->capabilities)
1635                         if (cap_set_proc(context->capabilities) < 0) {
1636                                 *error = EXIT_CAPABILITIES;
1637                                 return -errno;
1638                         }
1639
1640                 if (context->no_new_privileges)
1641                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1642                                 *error = EXIT_NO_NEW_PRIVILEGES;
1643                                 return -errno;
1644                         }
1645
1646 #ifdef HAVE_SECCOMP
1647                 if (context->address_families_whitelist ||
1648                     !set_isempty(context->address_families)) {
1649                         err = apply_address_families(context);
1650                         if (err < 0) {
1651                                 *error = EXIT_ADDRESS_FAMILIES;
1652                                 return err;
1653                         }
1654                 }
1655
1656                 if (context->syscall_whitelist ||
1657                     !set_isempty(context->syscall_filter) ||
1658                     !set_isempty(context->syscall_archs)) {
1659                         err = apply_seccomp(context);
1660                         if (err < 0) {
1661                                 *error = EXIT_SECCOMP;
1662                                 return err;
1663                         }
1664                 }
1665 #endif
1666
1667 #ifdef HAVE_SELINUX
1668                 if (context->selinux_context && use_selinux()) {
1669                         err = setexeccon(context->selinux_context);
1670                         if (err < 0 && !context->selinux_context_ignore) {
1671                                 *error = EXIT_SELINUX_CONTEXT;
1672                                 return err;
1673                         }
1674                 }
1675 #endif
1676
1677 #ifdef HAVE_APPARMOR
1678                 if (context->apparmor_profile && use_apparmor()) {
1679                         err = aa_change_onexec(context->apparmor_profile);
1680                         if (err < 0 && !context->apparmor_profile_ignore) {
1681                                 *error = EXIT_APPARMOR_PROFILE;
1682                                 return err;
1683                         }
1684                 }
1685 #endif
1686         }
1687
1688         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1689         if (err < 0) {
1690                 *error = EXIT_MEMORY;
1691                 return err;
1692         }
1693
1694         final_env = strv_env_merge(5,
1695                                    params->environment,
1696                                    our_env,
1697                                    context->environment,
1698                                    files_env,
1699                                    pam_env,
1700                                    NULL);
1701         if (!final_env) {
1702                 *error = EXIT_MEMORY;
1703                 return -ENOMEM;
1704         }
1705
1706         final_argv = replace_env_argv(argv, final_env);
1707         if (!final_argv) {
1708                 *error = EXIT_MEMORY;
1709                 return -ENOMEM;
1710         }
1711
1712         final_env = strv_env_clean(final_env);
1713
1714         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1715                 _cleanup_free_ char *line;
1716
1717                 line = exec_command_line(final_argv);
1718                 if (line) {
1719                         log_open();
1720                         log_struct_unit(LOG_DEBUG,
1721                                         params->unit_id,
1722                                         "EXECUTABLE=%s", command->path,
1723                                         "MESSAGE=Executing: %s", line,
1724                                         NULL);
1725                         log_close();
1726                 }
1727         }
1728         execve(command->path, final_argv, final_env);
1729         *error = EXIT_EXEC;
1730         return -errno;
1731 }
1732
1733 int exec_spawn(ExecCommand *command,
1734                const ExecContext *context,
1735                const ExecParameters *params,
1736                ExecRuntime *runtime,
1737                pid_t *ret) {
1738
1739         _cleanup_strv_free_ char **files_env = NULL;
1740         int *fds = NULL; unsigned n_fds = 0;
1741         char *line, **argv;
1742         int socket_fd;
1743         pid_t pid;
1744         int err;
1745
1746         assert(command);
1747         assert(context);
1748         assert(ret);
1749         assert(params);
1750         assert(params->fds || params->n_fds <= 0);
1751
1752         if (context->std_input == EXEC_INPUT_SOCKET ||
1753             context->std_output == EXEC_OUTPUT_SOCKET ||
1754             context->std_error == EXEC_OUTPUT_SOCKET) {
1755
1756                 if (params->n_fds != 1)
1757                         return -EINVAL;
1758
1759                 socket_fd = params->fds[0];
1760         } else {
1761                 socket_fd = -1;
1762                 fds = params->fds;
1763                 n_fds = params->n_fds;
1764         }
1765
1766         err = exec_context_load_environment(context, &files_env);
1767         if (err < 0) {
1768                 log_struct_unit(LOG_ERR,
1769                            params->unit_id,
1770                            "MESSAGE=Failed to load environment files: %s", strerror(-err),
1771                            "ERRNO=%d", -err,
1772                            NULL);
1773                 return err;
1774         }
1775
1776         argv = params->argv ?: command->argv;
1777
1778         line = exec_command_line(argv);
1779         if (!line)
1780                 return log_oom();
1781
1782         log_struct_unit(LOG_DEBUG,
1783                         params->unit_id,
1784                         "EXECUTABLE=%s", command->path,
1785                         "MESSAGE=About to execute: %s", line,
1786                         NULL);
1787         free(line);
1788
1789         pid = fork();
1790         if (pid < 0)
1791                 return -errno;
1792
1793         if (pid == 0) {
1794                 int r;
1795
1796                 err = exec_child(command,
1797                                  context,
1798                                  params,
1799                                  runtime,
1800                                  argv,
1801                                  socket_fd,
1802                                  fds, n_fds,
1803                                  files_env,
1804                                  &r);
1805                 if (r != 0) {
1806                         log_open();
1807                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1808                                    "EXECUTABLE=%s", command->path,
1809                                    "MESSAGE=Failed at step %s spawning %s: %s",
1810                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1811                                           command->path, strerror(-err),
1812                                    "ERRNO=%d", -err,
1813                                    NULL);
1814                         log_close();
1815                 }
1816
1817                 _exit(r);
1818         }
1819
1820         log_struct_unit(LOG_DEBUG,
1821                         params->unit_id,
1822                         "MESSAGE=Forked %s as "PID_FMT,
1823                         command->path, pid,
1824                         NULL);
1825
1826         /* We add the new process to the cgroup both in the child (so
1827          * that we can be sure that no user code is ever executed
1828          * outside of the cgroup) and in the parent (so that we can be
1829          * sure that when we kill the cgroup the process will be
1830          * killed too). */
1831         if (params->cgroup_path)
1832                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1833
1834         exec_status_start(&command->exec_status, pid);
1835
1836         *ret = pid;
1837         return 0;
1838 }
1839
1840 void exec_context_init(ExecContext *c) {
1841         assert(c);
1842
1843         c->umask = 0022;
1844         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1845         c->cpu_sched_policy = SCHED_OTHER;
1846         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1847         c->syslog_level_prefix = true;
1848         c->ignore_sigpipe = true;
1849         c->timer_slack_nsec = NSEC_INFINITY;
1850         c->personality = 0xffffffffUL;
1851         c->runtime_directory_mode = 0755;
1852 }
1853
1854 void exec_context_done(ExecContext *c) {
1855         unsigned l;
1856
1857         assert(c);
1858
1859         strv_free(c->environment);
1860         c->environment = NULL;
1861
1862         strv_free(c->environment_files);
1863         c->environment_files = NULL;
1864
1865         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1866                 free(c->rlimit[l]);
1867                 c->rlimit[l] = NULL;
1868         }
1869
1870         free(c->working_directory);
1871         c->working_directory = NULL;
1872         free(c->root_directory);
1873         c->root_directory = NULL;
1874
1875         free(c->tty_path);
1876         c->tty_path = NULL;
1877
1878         free(c->syslog_identifier);
1879         c->syslog_identifier = NULL;
1880
1881         free(c->user);
1882         c->user = NULL;
1883
1884         free(c->group);
1885         c->group = NULL;
1886
1887         strv_free(c->supplementary_groups);
1888         c->supplementary_groups = NULL;
1889
1890         free(c->pam_name);
1891         c->pam_name = NULL;
1892
1893         if (c->capabilities) {
1894                 cap_free(c->capabilities);
1895                 c->capabilities = NULL;
1896         }
1897
1898         strv_free(c->read_only_dirs);
1899         c->read_only_dirs = NULL;
1900
1901         strv_free(c->read_write_dirs);
1902         c->read_write_dirs = NULL;
1903
1904         strv_free(c->inaccessible_dirs);
1905         c->inaccessible_dirs = NULL;
1906
1907         if (c->cpuset)
1908                 CPU_FREE(c->cpuset);
1909
1910         free(c->utmp_id);
1911         c->utmp_id = NULL;
1912
1913         free(c->selinux_context);
1914         c->selinux_context = NULL;
1915
1916         free(c->apparmor_profile);
1917         c->apparmor_profile = NULL;
1918
1919         set_free(c->syscall_filter);
1920         c->syscall_filter = NULL;
1921
1922         set_free(c->syscall_archs);
1923         c->syscall_archs = NULL;
1924
1925         set_free(c->address_families);
1926         c->address_families = NULL;
1927
1928         strv_free(c->runtime_directory);
1929         c->runtime_directory = NULL;
1930
1931         bus_endpoint_free(c->bus_endpoint);
1932         c->bus_endpoint = NULL;
1933 }
1934
1935 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1936         char **i;
1937
1938         assert(c);
1939
1940         if (!runtime_prefix)
1941                 return 0;
1942
1943         STRV_FOREACH(i, c->runtime_directory) {
1944                 _cleanup_free_ char *p;
1945
1946                 p = strjoin(runtime_prefix, "/", *i, NULL);
1947                 if (!p)
1948                         return -ENOMEM;
1949
1950                 /* We execute this synchronously, since we need to be
1951                  * sure this is gone when we start the service
1952                  * next. */
1953                 rm_rf_dangerous(p, false, true, false);
1954         }
1955
1956         return 0;
1957 }
1958
1959 void exec_command_done(ExecCommand *c) {
1960         assert(c);
1961
1962         free(c->path);
1963         c->path = NULL;
1964
1965         strv_free(c->argv);
1966         c->argv = NULL;
1967 }
1968
1969 void exec_command_done_array(ExecCommand *c, unsigned n) {
1970         unsigned i;
1971
1972         for (i = 0; i < n; i++)
1973                 exec_command_done(c+i);
1974 }
1975
1976 void exec_command_free_list(ExecCommand *c) {
1977         ExecCommand *i;
1978
1979         while ((i = c)) {
1980                 LIST_REMOVE(command, c, i);
1981                 exec_command_done(i);
1982                 free(i);
1983         }
1984 }
1985
1986 void exec_command_free_array(ExecCommand **c, unsigned n) {
1987         unsigned i;
1988
1989         for (i = 0; i < n; i++) {
1990                 exec_command_free_list(c[i]);
1991                 c[i] = NULL;
1992         }
1993 }
1994
1995 int exec_context_load_environment(const ExecContext *c, char ***l) {
1996         char **i, **r = NULL;
1997
1998         assert(c);
1999         assert(l);
2000
2001         STRV_FOREACH(i, c->environment_files) {
2002                 char *fn;
2003                 int k;
2004                 bool ignore = false;
2005                 char **p;
2006                 _cleanup_globfree_ glob_t pglob = {};
2007                 int count, n;
2008
2009                 fn = *i;
2010
2011                 if (fn[0] == '-') {
2012                         ignore = true;
2013                         fn ++;
2014                 }
2015
2016                 if (!path_is_absolute(fn)) {
2017                         if (ignore)
2018                                 continue;
2019
2020                         strv_free(r);
2021                         return -EINVAL;
2022                 }
2023
2024                 /* Filename supports globbing, take all matching files */
2025                 errno = 0;
2026                 if (glob(fn, 0, NULL, &pglob) != 0) {
2027                         if (ignore)
2028                                 continue;
2029
2030                         strv_free(r);
2031                         return errno ? -errno : -EINVAL;
2032                 }
2033                 count = pglob.gl_pathc;
2034                 if (count == 0) {
2035                         if (ignore)
2036                                 continue;
2037
2038                         strv_free(r);
2039                         return -EINVAL;
2040                 }
2041                 for (n = 0; n < count; n++) {
2042                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2043                         if (k < 0) {
2044                                 if (ignore)
2045                                         continue;
2046
2047                                 strv_free(r);
2048                                 return k;
2049                         }
2050                         /* Log invalid environment variables with filename */
2051                         if (p)
2052                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2053
2054                         if (r == NULL)
2055                                 r = p;
2056                         else {
2057                                 char **m;
2058
2059                                 m = strv_env_merge(2, r, p);
2060                                 strv_free(r);
2061                                 strv_free(p);
2062                                 if (!m)
2063                                         return -ENOMEM;
2064
2065                                 r = m;
2066                         }
2067                 }
2068         }
2069
2070         *l = r;
2071
2072         return 0;
2073 }
2074
2075 static bool tty_may_match_dev_console(const char *tty) {
2076         _cleanup_free_ char *active = NULL;
2077        char *console;
2078
2079         if (startswith(tty, "/dev/"))
2080                 tty += 5;
2081
2082         /* trivial identity? */
2083         if (streq(tty, "console"))
2084                 return true;
2085
2086         console = resolve_dev_console(&active);
2087         /* if we could not resolve, assume it may */
2088         if (!console)
2089                 return true;
2090
2091         /* "tty0" means the active VC, so it may be the same sometimes */
2092         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2093 }
2094
2095 bool exec_context_may_touch_console(ExecContext *ec) {
2096         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2097                 is_terminal_input(ec->std_input) ||
2098                 is_terminal_output(ec->std_output) ||
2099                 is_terminal_output(ec->std_error)) &&
2100                tty_may_match_dev_console(tty_path(ec));
2101 }
2102
2103 static void strv_fprintf(FILE *f, char **l) {
2104         char **g;
2105
2106         assert(f);
2107
2108         STRV_FOREACH(g, l)
2109                 fprintf(f, " %s", *g);
2110 }
2111
2112 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2113         char **e;
2114         unsigned i;
2115
2116         assert(c);
2117         assert(f);
2118
2119         prefix = strempty(prefix);
2120
2121         fprintf(f,
2122                 "%sUMask: %04o\n"
2123                 "%sWorkingDirectory: %s\n"
2124                 "%sRootDirectory: %s\n"
2125                 "%sNonBlocking: %s\n"
2126                 "%sPrivateTmp: %s\n"
2127                 "%sPrivateNetwork: %s\n"
2128                 "%sPrivateDevices: %s\n"
2129                 "%sProtectHome: %s\n"
2130                 "%sProtectSystem: %s\n"
2131                 "%sIgnoreSIGPIPE: %s\n",
2132                 prefix, c->umask,
2133                 prefix, c->working_directory ? c->working_directory : "/",
2134                 prefix, c->root_directory ? c->root_directory : "/",
2135                 prefix, yes_no(c->non_blocking),
2136                 prefix, yes_no(c->private_tmp),
2137                 prefix, yes_no(c->private_network),
2138                 prefix, yes_no(c->private_devices),
2139                 prefix, protect_home_to_string(c->protect_home),
2140                 prefix, protect_system_to_string(c->protect_system),
2141                 prefix, yes_no(c->ignore_sigpipe));
2142
2143         STRV_FOREACH(e, c->environment)
2144                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2145
2146         STRV_FOREACH(e, c->environment_files)
2147                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2148
2149         if (c->nice_set)
2150                 fprintf(f,
2151                         "%sNice: %i\n",
2152                         prefix, c->nice);
2153
2154         if (c->oom_score_adjust_set)
2155                 fprintf(f,
2156                         "%sOOMScoreAdjust: %i\n",
2157                         prefix, c->oom_score_adjust);
2158
2159         for (i = 0; i < RLIM_NLIMITS; i++)
2160                 if (c->rlimit[i])
2161                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2162                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2163
2164         if (c->ioprio_set) {
2165                 _cleanup_free_ char *class_str = NULL;
2166
2167                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2168                 fprintf(f,
2169                         "%sIOSchedulingClass: %s\n"
2170                         "%sIOPriority: %i\n",
2171                         prefix, strna(class_str),
2172                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2173         }
2174
2175         if (c->cpu_sched_set) {
2176                 _cleanup_free_ char *policy_str = NULL;
2177
2178                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2179                 fprintf(f,
2180                         "%sCPUSchedulingPolicy: %s\n"
2181                         "%sCPUSchedulingPriority: %i\n"
2182                         "%sCPUSchedulingResetOnFork: %s\n",
2183                         prefix, strna(policy_str),
2184                         prefix, c->cpu_sched_priority,
2185                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2186         }
2187
2188         if (c->cpuset) {
2189                 fprintf(f, "%sCPUAffinity:", prefix);
2190                 for (i = 0; i < c->cpuset_ncpus; i++)
2191                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2192                                 fprintf(f, " %u", i);
2193                 fputs("\n", f);
2194         }
2195
2196         if (c->timer_slack_nsec != NSEC_INFINITY)
2197                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2198
2199         fprintf(f,
2200                 "%sStandardInput: %s\n"
2201                 "%sStandardOutput: %s\n"
2202                 "%sStandardError: %s\n",
2203                 prefix, exec_input_to_string(c->std_input),
2204                 prefix, exec_output_to_string(c->std_output),
2205                 prefix, exec_output_to_string(c->std_error));
2206
2207         if (c->tty_path)
2208                 fprintf(f,
2209                         "%sTTYPath: %s\n"
2210                         "%sTTYReset: %s\n"
2211                         "%sTTYVHangup: %s\n"
2212                         "%sTTYVTDisallocate: %s\n",
2213                         prefix, c->tty_path,
2214                         prefix, yes_no(c->tty_reset),
2215                         prefix, yes_no(c->tty_vhangup),
2216                         prefix, yes_no(c->tty_vt_disallocate));
2217
2218         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2219             c->std_output == EXEC_OUTPUT_KMSG ||
2220             c->std_output == EXEC_OUTPUT_JOURNAL ||
2221             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2222             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2223             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2224             c->std_error == EXEC_OUTPUT_SYSLOG ||
2225             c->std_error == EXEC_OUTPUT_KMSG ||
2226             c->std_error == EXEC_OUTPUT_JOURNAL ||
2227             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2228             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2229             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2230
2231                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2232
2233                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2234                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2235
2236                 fprintf(f,
2237                         "%sSyslogFacility: %s\n"
2238                         "%sSyslogLevel: %s\n",
2239                         prefix, strna(fac_str),
2240                         prefix, strna(lvl_str));
2241         }
2242
2243         if (c->capabilities) {
2244                 _cleanup_cap_free_charp_ char *t;
2245
2246                 t = cap_to_text(c->capabilities, NULL);
2247                 if (t)
2248                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2249         }
2250
2251         if (c->secure_bits)
2252                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2253                         prefix,
2254                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2255                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2256                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2257                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2258                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2259                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2260
2261         if (c->capability_bounding_set_drop) {
2262                 unsigned long l;
2263                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2264
2265                 for (l = 0; l <= cap_last_cap(); l++)
2266                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2267                                 _cleanup_cap_free_charp_ char *t;
2268
2269                                 t = cap_to_name(l);
2270                                 if (t)
2271                                         fprintf(f, " %s", t);
2272                         }
2273
2274                 fputs("\n", f);
2275         }
2276
2277         if (c->user)
2278                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2279         if (c->group)
2280                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2281
2282         if (strv_length(c->supplementary_groups) > 0) {
2283                 fprintf(f, "%sSupplementaryGroups:", prefix);
2284                 strv_fprintf(f, c->supplementary_groups);
2285                 fputs("\n", f);
2286         }
2287
2288         if (c->pam_name)
2289                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2290
2291         if (strv_length(c->read_write_dirs) > 0) {
2292                 fprintf(f, "%sReadWriteDirs:", prefix);
2293                 strv_fprintf(f, c->read_write_dirs);
2294                 fputs("\n", f);
2295         }
2296
2297         if (strv_length(c->read_only_dirs) > 0) {
2298                 fprintf(f, "%sReadOnlyDirs:", prefix);
2299                 strv_fprintf(f, c->read_only_dirs);
2300                 fputs("\n", f);
2301         }
2302
2303         if (strv_length(c->inaccessible_dirs) > 0) {
2304                 fprintf(f, "%sInaccessibleDirs:", prefix);
2305                 strv_fprintf(f, c->inaccessible_dirs);
2306                 fputs("\n", f);
2307         }
2308
2309         if (c->utmp_id)
2310                 fprintf(f,
2311                         "%sUtmpIdentifier: %s\n",
2312                         prefix, c->utmp_id);
2313
2314         if (c->selinux_context)
2315                 fprintf(f,
2316                         "%sSELinuxContext: %s%s\n",
2317                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2318
2319         if (c->personality != 0xffffffffUL)
2320                 fprintf(f,
2321                         "%sPersonality: %s\n",
2322                         prefix, strna(personality_to_string(c->personality)));
2323
2324         if (c->syscall_filter) {
2325 #ifdef HAVE_SECCOMP
2326                 Iterator j;
2327                 void *id;
2328                 bool first = true;
2329 #endif
2330
2331                 fprintf(f,
2332                         "%sSystemCallFilter: ",
2333                         prefix);
2334
2335                 if (!c->syscall_whitelist)
2336                         fputc('~', f);
2337
2338 #ifdef HAVE_SECCOMP
2339                 SET_FOREACH(id, c->syscall_filter, j) {
2340                         _cleanup_free_ char *name = NULL;
2341
2342                         if (first)
2343                                 first = false;
2344                         else
2345                                 fputc(' ', f);
2346
2347                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2348                         fputs(strna(name), f);
2349                 }
2350 #endif
2351
2352                 fputc('\n', f);
2353         }
2354
2355         if (c->syscall_archs) {
2356 #ifdef HAVE_SECCOMP
2357                 Iterator j;
2358                 void *id;
2359 #endif
2360
2361                 fprintf(f,
2362                         "%sSystemCallArchitectures:",
2363                         prefix);
2364
2365 #ifdef HAVE_SECCOMP
2366                 SET_FOREACH(id, c->syscall_archs, j)
2367                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2368 #endif
2369                 fputc('\n', f);
2370         }
2371
2372         if (c->syscall_errno != 0)
2373                 fprintf(f,
2374                         "%sSystemCallErrorNumber: %s\n",
2375                         prefix, strna(errno_to_name(c->syscall_errno)));
2376
2377         if (c->apparmor_profile)
2378                 fprintf(f,
2379                         "%sAppArmorProfile: %s%s\n",
2380                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2381 }
2382
2383 void exec_status_start(ExecStatus *s, pid_t pid) {
2384         assert(s);
2385
2386         zero(*s);
2387         s->pid = pid;
2388         dual_timestamp_get(&s->start_timestamp);
2389 }
2390
2391 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2392         assert(s);
2393
2394         if (s->pid && s->pid != pid)
2395                 zero(*s);
2396
2397         s->pid = pid;
2398         dual_timestamp_get(&s->exit_timestamp);
2399
2400         s->code = code;
2401         s->status = status;
2402
2403         if (context) {
2404                 if (context->utmp_id)
2405                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2406
2407                 exec_context_tty_reset(context);
2408         }
2409 }
2410
2411 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2412         char buf[FORMAT_TIMESTAMP_MAX];
2413
2414         assert(s);
2415         assert(f);
2416
2417         if (s->pid <= 0)
2418                 return;
2419
2420         prefix = strempty(prefix);
2421
2422         fprintf(f,
2423                 "%sPID: "PID_FMT"\n",
2424                 prefix, s->pid);
2425
2426         if (s->start_timestamp.realtime > 0)
2427                 fprintf(f,
2428                         "%sStart Timestamp: %s\n",
2429                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2430
2431         if (s->exit_timestamp.realtime > 0)
2432                 fprintf(f,
2433                         "%sExit Timestamp: %s\n"
2434                         "%sExit Code: %s\n"
2435                         "%sExit Status: %i\n",
2436                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2437                         prefix, sigchld_code_to_string(s->code),
2438                         prefix, s->status);
2439 }
2440
2441 char *exec_command_line(char **argv) {
2442         size_t k;
2443         char *n, *p, **a;
2444         bool first = true;
2445
2446         assert(argv);
2447
2448         k = 1;
2449         STRV_FOREACH(a, argv)
2450                 k += strlen(*a)+3;
2451
2452         if (!(n = new(char, k)))
2453                 return NULL;
2454
2455         p = n;
2456         STRV_FOREACH(a, argv) {
2457
2458                 if (!first)
2459                         *(p++) = ' ';
2460                 else
2461                         first = false;
2462
2463                 if (strpbrk(*a, WHITESPACE)) {
2464                         *(p++) = '\'';
2465                         p = stpcpy(p, *a);
2466                         *(p++) = '\'';
2467                 } else
2468                         p = stpcpy(p, *a);
2469
2470         }
2471
2472         *p = 0;
2473
2474         /* FIXME: this doesn't really handle arguments that have
2475          * spaces and ticks in them */
2476
2477         return n;
2478 }
2479
2480 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2481         _cleanup_free_ char *cmd = NULL;
2482         const char *prefix2;
2483
2484         assert(c);
2485         assert(f);
2486
2487         prefix = strempty(prefix);
2488         prefix2 = strappenda(prefix, "\t");
2489
2490         cmd = exec_command_line(c->argv);
2491         fprintf(f,
2492                 "%sCommand Line: %s\n",
2493                 prefix, cmd ? cmd : strerror(ENOMEM));
2494
2495         exec_status_dump(&c->exec_status, f, prefix2);
2496 }
2497
2498 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2499         assert(f);
2500
2501         prefix = strempty(prefix);
2502
2503         LIST_FOREACH(command, c, c)
2504                 exec_command_dump(c, f, prefix);
2505 }
2506
2507 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2508         ExecCommand *end;
2509
2510         assert(l);
2511         assert(e);
2512
2513         if (*l) {
2514                 /* It's kind of important, that we keep the order here */
2515                 LIST_FIND_TAIL(command, *l, end);
2516                 LIST_INSERT_AFTER(command, *l, end, e);
2517         } else
2518               *l = e;
2519 }
2520
2521 int exec_command_set(ExecCommand *c, const char *path, ...) {
2522         va_list ap;
2523         char **l, *p;
2524
2525         assert(c);
2526         assert(path);
2527
2528         va_start(ap, path);
2529         l = strv_new_ap(path, ap);
2530         va_end(ap);
2531
2532         if (!l)
2533                 return -ENOMEM;
2534
2535         p = strdup(path);
2536         if (!p) {
2537                 strv_free(l);
2538                 return -ENOMEM;
2539         }
2540
2541         free(c->path);
2542         c->path = p;
2543
2544         strv_free(c->argv);
2545         c->argv = l;
2546
2547         return 0;
2548 }
2549
2550 static int exec_runtime_allocate(ExecRuntime **rt) {
2551
2552         if (*rt)
2553                 return 0;
2554
2555         *rt = new0(ExecRuntime, 1);
2556         if (!*rt)
2557                 return -ENOMEM;
2558
2559         (*rt)->n_ref = 1;
2560         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2561
2562         return 0;
2563 }
2564
2565 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2566         int r;
2567
2568         assert(rt);
2569         assert(c);
2570         assert(id);
2571
2572         if (*rt)
2573                 return 1;
2574
2575         if (!c->private_network && !c->private_tmp)
2576                 return 0;
2577
2578         r = exec_runtime_allocate(rt);
2579         if (r < 0)
2580                 return r;
2581
2582         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2583                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2584                         return -errno;
2585         }
2586
2587         if (c->private_tmp && !(*rt)->tmp_dir) {
2588                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2589                 if (r < 0)
2590                         return r;
2591         }
2592
2593         return 1;
2594 }
2595
2596 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2597         assert(r);
2598         assert(r->n_ref > 0);
2599
2600         r->n_ref++;
2601         return r;
2602 }
2603
2604 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2605
2606         if (!r)
2607                 return NULL;
2608
2609         assert(r->n_ref > 0);
2610
2611         r->n_ref--;
2612         if (r->n_ref <= 0) {
2613                 free(r->tmp_dir);
2614                 free(r->var_tmp_dir);
2615                 safe_close_pair(r->netns_storage_socket);
2616                 free(r);
2617         }
2618
2619         return NULL;
2620 }
2621
2622 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2623         assert(u);
2624         assert(f);
2625         assert(fds);
2626
2627         if (!rt)
2628                 return 0;
2629
2630         if (rt->tmp_dir)
2631                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2632
2633         if (rt->var_tmp_dir)
2634                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2635
2636         if (rt->netns_storage_socket[0] >= 0) {
2637                 int copy;
2638
2639                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2640                 if (copy < 0)
2641                         return copy;
2642
2643                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2644         }
2645
2646         if (rt->netns_storage_socket[1] >= 0) {
2647                 int copy;
2648
2649                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2650                 if (copy < 0)
2651                         return copy;
2652
2653                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2654         }
2655
2656         return 0;
2657 }
2658
2659 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2660         int r;
2661
2662         assert(rt);
2663         assert(key);
2664         assert(value);
2665
2666         if (streq(key, "tmp-dir")) {
2667                 char *copy;
2668
2669                 r = exec_runtime_allocate(rt);
2670                 if (r < 0)
2671                         return r;
2672
2673                 copy = strdup(value);
2674                 if (!copy)
2675                         return log_oom();
2676
2677                 free((*rt)->tmp_dir);
2678                 (*rt)->tmp_dir = copy;
2679
2680         } else if (streq(key, "var-tmp-dir")) {
2681                 char *copy;
2682
2683                 r = exec_runtime_allocate(rt);
2684                 if (r < 0)
2685                         return r;
2686
2687                 copy = strdup(value);
2688                 if (!copy)
2689                         return log_oom();
2690
2691                 free((*rt)->var_tmp_dir);
2692                 (*rt)->var_tmp_dir = copy;
2693
2694         } else if (streq(key, "netns-socket-0")) {
2695                 int fd;
2696
2697                 r = exec_runtime_allocate(rt);
2698                 if (r < 0)
2699                         return r;
2700
2701                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2702                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2703                 else {
2704                         safe_close((*rt)->netns_storage_socket[0]);
2705                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2706                 }
2707         } else if (streq(key, "netns-socket-1")) {
2708                 int fd;
2709
2710                 r = exec_runtime_allocate(rt);
2711                 if (r < 0)
2712                         return r;
2713
2714                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2715                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2716                 else {
2717                         safe_close((*rt)->netns_storage_socket[1]);
2718                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2719                 }
2720         } else
2721                 return 0;
2722
2723         return 1;
2724 }
2725
2726 static void *remove_tmpdir_thread(void *p) {
2727         _cleanup_free_ char *path = p;
2728
2729         rm_rf_dangerous(path, false, true, false);
2730         return NULL;
2731 }
2732
2733 void exec_runtime_destroy(ExecRuntime *rt) {
2734         int r;
2735
2736         if (!rt)
2737                 return;
2738
2739         /* If there are multiple users of this, let's leave the stuff around */
2740         if (rt->n_ref > 1)
2741                 return;
2742
2743         if (rt->tmp_dir) {
2744                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2745
2746                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2747                 if (r < 0) {
2748                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2749                         free(rt->tmp_dir);
2750                 }
2751
2752                 rt->tmp_dir = NULL;
2753         }
2754
2755         if (rt->var_tmp_dir) {
2756                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2757
2758                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2759                 if (r < 0) {
2760                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2761                         free(rt->var_tmp_dir);
2762                 }
2763
2764                 rt->var_tmp_dir = NULL;
2765         }
2766
2767         safe_close_pair(rt->netns_storage_socket);
2768 }
2769
2770 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2771         [EXEC_INPUT_NULL] = "null",
2772         [EXEC_INPUT_TTY] = "tty",
2773         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2774         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2775         [EXEC_INPUT_SOCKET] = "socket"
2776 };
2777
2778 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2779
2780 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2781         [EXEC_OUTPUT_INHERIT] = "inherit",
2782         [EXEC_OUTPUT_NULL] = "null",
2783         [EXEC_OUTPUT_TTY] = "tty",
2784         [EXEC_OUTPUT_SYSLOG] = "syslog",
2785         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2786         [EXEC_OUTPUT_KMSG] = "kmsg",
2787         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2788         [EXEC_OUTPUT_JOURNAL] = "journal",
2789         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2790         [EXEC_OUTPUT_SOCKET] = "socket"
2791 };
2792
2793 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);