chiark / gitweb /
bus-proxyd: explicitly address messages to unique and well-known name
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "bus-kernel.h"
87 #include "label.h"
88
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95
96 /* This assumes there is a 'tty' group */
97 #define TTY_MODE 0620
98
99 #define SNDBUF_SIZE (8*1024*1024)
100
101 static int shift_fds(int fds[], unsigned n_fds) {
102         int start, restart_from;
103
104         if (n_fds <= 0)
105                 return 0;
106
107         /* Modifies the fds array! (sorts it) */
108
109         assert(fds);
110
111         start = 0;
112         for (;;) {
113                 int i;
114
115                 restart_from = -1;
116
117                 for (i = start; i < (int) n_fds; i++) {
118                         int nfd;
119
120                         /* Already at right index? */
121                         if (fds[i] == i+3)
122                                 continue;
123
124                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
125                                 return -errno;
126
127                         safe_close(fds[i]);
128                         fds[i] = nfd;
129
130                         /* Hmm, the fd we wanted isn't free? Then
131                          * let's remember that and try again from here*/
132                         if (nfd != i+3 && restart_from < 0)
133                                 restart_from = i;
134                 }
135
136                 if (restart_from < 0)
137                         break;
138
139                 start = restart_from;
140         }
141
142         return 0;
143 }
144
145 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
146         unsigned i;
147         int r;
148
149         if (n_fds <= 0)
150                 return 0;
151
152         assert(fds);
153
154         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
155
156         for (i = 0; i < n_fds; i++) {
157
158                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159                         return r;
160
161                 /* We unconditionally drop FD_CLOEXEC from the fds,
162                  * since after all we want to pass these fds to our
163                  * children */
164
165                 if ((r = fd_cloexec(fds[i], false)) < 0)
166                         return r;
167         }
168
169         return 0;
170 }
171
172 _pure_ static const char *tty_path(const ExecContext *context) {
173         assert(context);
174
175         if (context->tty_path)
176                 return context->tty_path;
177
178         return "/dev/console";
179 }
180
181 static void exec_context_tty_reset(const ExecContext *context) {
182         assert(context);
183
184         if (context->tty_vhangup)
185                 terminal_vhangup(tty_path(context));
186
187         if (context->tty_reset)
188                 reset_terminal(tty_path(context));
189
190         if (context->tty_vt_disallocate && context->tty_path)
191                 vt_disallocate(context->tty_path);
192 }
193
194 static bool is_terminal_output(ExecOutput o) {
195         return
196                 o == EXEC_OUTPUT_TTY ||
197                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 }
201
202 static int open_null_as(int flags, int nfd) {
203         int fd, r;
204
205         assert(nfd >= 0);
206
207         fd = open("/dev/null", flags|O_NOCTTY);
208         if (fd < 0)
209                 return -errno;
210
211         if (fd != nfd) {
212                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
213                 safe_close(fd);
214         } else
215                 r = nfd;
216
217         return r;
218 }
219
220 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
221         int fd, r;
222         union sockaddr_union sa = {
223                 .un.sun_family = AF_UNIX,
224                 .un.sun_path = "/run/systemd/journal/stdout",
225         };
226
227         assert(context);
228         assert(output < _EXEC_OUTPUT_MAX);
229         assert(ident);
230         assert(nfd >= 0);
231
232         fd = socket(AF_UNIX, SOCK_STREAM, 0);
233         if (fd < 0)
234                 return -errno;
235
236         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
237         if (r < 0) {
238                 safe_close(fd);
239                 return -errno;
240         }
241
242         if (shutdown(fd, SHUT_RD) < 0) {
243                 safe_close(fd);
244                 return -errno;
245         }
246
247         fd_inc_sndbuf(fd, SNDBUF_SIZE);
248
249         dprintf(fd,
250                 "%s\n"
251                 "%s\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n"
256                 "%i\n",
257                 context->syslog_identifier ? context->syslog_identifier : ident,
258                 unit_id,
259                 context->syslog_priority,
260                 !!context->syslog_level_prefix,
261                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
262                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
263                 is_terminal_output(output));
264
265         if (fd != nfd) {
266                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
267                 safe_close(fd);
268         } else
269                 r = nfd;
270
271         return r;
272 }
273 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
274         int fd, r;
275
276         assert(path);
277         assert(nfd >= 0);
278
279         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
280                 return fd;
281
282         if (fd != nfd) {
283                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
284                 safe_close(fd);
285         } else
286                 r = nfd;
287
288         return r;
289 }
290
291 static bool is_terminal_input(ExecInput i) {
292         return
293                 i == EXEC_INPUT_TTY ||
294                 i == EXEC_INPUT_TTY_FORCE ||
295                 i == EXEC_INPUT_TTY_FAIL;
296 }
297
298 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
299
300         if (is_terminal_input(std_input) && !apply_tty_stdin)
301                 return EXEC_INPUT_NULL;
302
303         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
304                 return EXEC_INPUT_NULL;
305
306         return std_input;
307 }
308
309 static int fixup_output(ExecOutput std_output, int socket_fd) {
310
311         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
312                 return EXEC_OUTPUT_INHERIT;
313
314         return std_output;
315 }
316
317 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
318         ExecInput i;
319
320         assert(context);
321
322         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
323
324         switch (i) {
325
326         case EXEC_INPUT_NULL:
327                 return open_null_as(O_RDONLY, STDIN_FILENO);
328
329         case EXEC_INPUT_TTY:
330         case EXEC_INPUT_TTY_FORCE:
331         case EXEC_INPUT_TTY_FAIL: {
332                 int fd, r;
333
334                 fd = acquire_terminal(tty_path(context),
335                                       i == EXEC_INPUT_TTY_FAIL,
336                                       i == EXEC_INPUT_TTY_FORCE,
337                                       false,
338                                       USEC_INFINITY);
339                 if (fd < 0)
340                         return fd;
341
342                 if (fd != STDIN_FILENO) {
343                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
344                         safe_close(fd);
345                 } else
346                         r = STDIN_FILENO;
347
348                 return r;
349         }
350
351         case EXEC_INPUT_SOCKET:
352                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
353
354         default:
355                 assert_not_reached("Unknown input type");
356         }
357 }
358
359 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
360         ExecOutput o;
361         ExecInput i;
362         int r;
363
364         assert(context);
365         assert(ident);
366
367         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
368         o = fixup_output(context->std_output, socket_fd);
369
370         if (fileno == STDERR_FILENO) {
371                 ExecOutput e;
372                 e = fixup_output(context->std_error, socket_fd);
373
374                 /* This expects the input and output are already set up */
375
376                 /* Don't change the stderr file descriptor if we inherit all
377                  * the way and are not on a tty */
378                 if (e == EXEC_OUTPUT_INHERIT &&
379                     o == EXEC_OUTPUT_INHERIT &&
380                     i == EXEC_INPUT_NULL &&
381                     !is_terminal_input(context->std_input) &&
382                     getppid () != 1)
383                         return fileno;
384
385                 /* Duplicate from stdout if possible */
386                 if (e == o || e == EXEC_OUTPUT_INHERIT)
387                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
388
389                 o = e;
390
391         } else if (o == EXEC_OUTPUT_INHERIT) {
392                 /* If input got downgraded, inherit the original value */
393                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
394                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
395
396                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
397                 if (i != EXEC_INPUT_NULL)
398                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
399
400                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
401                 if (getppid() != 1)
402                         return fileno;
403
404                 /* We need to open /dev/null here anew, to get the right access mode. */
405                 return open_null_as(O_WRONLY, fileno);
406         }
407
408         switch (o) {
409
410         case EXEC_OUTPUT_NULL:
411                 return open_null_as(O_WRONLY, fileno);
412
413         case EXEC_OUTPUT_TTY:
414                 if (is_terminal_input(i))
415                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
416
417                 /* We don't reset the terminal if this is just about output */
418                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
419
420         case EXEC_OUTPUT_SYSLOG:
421         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
422         case EXEC_OUTPUT_KMSG:
423         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
424         case EXEC_OUTPUT_JOURNAL:
425         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
426                 r = connect_logger_as(context, o, ident, unit_id, fileno);
427                 if (r < 0) {
428                         log_struct_unit(LOG_CRIT, unit_id,
429                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
430                                 fileno == STDOUT_FILENO ? "out" : "err",
431                                 unit_id, strerror(-r),
432                                 "ERRNO=%d", -r,
433                                 NULL);
434                         r = open_null_as(O_WRONLY, fileno);
435                 }
436                 return r;
437
438         case EXEC_OUTPUT_SOCKET:
439                 assert(socket_fd >= 0);
440                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
441
442         default:
443                 assert_not_reached("Unknown error type");
444         }
445 }
446
447 static int chown_terminal(int fd, uid_t uid) {
448         struct stat st;
449
450         assert(fd >= 0);
451
452         /* This might fail. What matters are the results. */
453         (void) fchown(fd, uid, -1);
454         (void) fchmod(fd, TTY_MODE);
455
456         if (fstat(fd, &st) < 0)
457                 return -errno;
458
459         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
460                 return -EPERM;
461
462         return 0;
463 }
464
465 static int setup_confirm_stdio(int *_saved_stdin,
466                                int *_saved_stdout) {
467         int fd = -1, saved_stdin, saved_stdout = -1, r;
468
469         assert(_saved_stdin);
470         assert(_saved_stdout);
471
472         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
473         if (saved_stdin < 0)
474                 return -errno;
475
476         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
477         if (saved_stdout < 0) {
478                 r = errno;
479                 goto fail;
480         }
481
482         fd = acquire_terminal(
483                         "/dev/console",
484                         false,
485                         false,
486                         false,
487                         DEFAULT_CONFIRM_USEC);
488         if (fd < 0) {
489                 r = fd;
490                 goto fail;
491         }
492
493         r = chown_terminal(fd, getuid());
494         if (r < 0)
495                 goto fail;
496
497         if (dup2(fd, STDIN_FILENO) < 0) {
498                 r = -errno;
499                 goto fail;
500         }
501
502         if (dup2(fd, STDOUT_FILENO) < 0) {
503                 r = -errno;
504                 goto fail;
505         }
506
507         if (fd >= 2)
508                 safe_close(fd);
509
510         *_saved_stdin = saved_stdin;
511         *_saved_stdout = saved_stdout;
512
513         return 0;
514
515 fail:
516         safe_close(saved_stdout);
517         safe_close(saved_stdin);
518         safe_close(fd);
519
520         return r;
521 }
522
523 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
524         _cleanup_close_ int fd = -1;
525         va_list ap;
526
527         assert(format);
528
529         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
530         if (fd < 0)
531                 return fd;
532
533         va_start(ap, format);
534         vdprintf(fd, format, ap);
535         va_end(ap);
536
537         return 0;
538 }
539
540 static int restore_confirm_stdio(int *saved_stdin,
541                                  int *saved_stdout) {
542
543         int r = 0;
544
545         assert(saved_stdin);
546         assert(saved_stdout);
547
548         release_terminal();
549
550         if (*saved_stdin >= 0)
551                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
552                         r = -errno;
553
554         if (*saved_stdout >= 0)
555                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
556                         r = -errno;
557
558         safe_close(*saved_stdin);
559         safe_close(*saved_stdout);
560
561         return r;
562 }
563
564 static int ask_for_confirmation(char *response, char **argv) {
565         int saved_stdout = -1, saved_stdin = -1, r;
566         _cleanup_free_ char *line = NULL;
567
568         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
569         if (r < 0)
570                 return r;
571
572         line = exec_command_line(argv);
573         if (!line)
574                 return -ENOMEM;
575
576         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
577
578         restore_confirm_stdio(&saved_stdin, &saved_stdout);
579
580         return r;
581 }
582
583 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
584         bool keep_groups = false;
585         int r;
586
587         assert(context);
588
589         /* Lookup and set GID and supplementary group list. Here too
590          * we avoid NSS lookups for gid=0. */
591
592         if (context->group || username) {
593
594                 if (context->group) {
595                         const char *g = context->group;
596
597                         if ((r = get_group_creds(&g, &gid)) < 0)
598                                 return r;
599                 }
600
601                 /* First step, initialize groups from /etc/groups */
602                 if (username && gid != 0) {
603                         if (initgroups(username, gid) < 0)
604                                 return -errno;
605
606                         keep_groups = true;
607                 }
608
609                 /* Second step, set our gids */
610                 if (setresgid(gid, gid, gid) < 0)
611                         return -errno;
612         }
613
614         if (context->supplementary_groups) {
615                 int ngroups_max, k;
616                 gid_t *gids;
617                 char **i;
618
619                 /* Final step, initialize any manually set supplementary groups */
620                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
621
622                 if (!(gids = new(gid_t, ngroups_max)))
623                         return -ENOMEM;
624
625                 if (keep_groups) {
626                         if ((k = getgroups(ngroups_max, gids)) < 0) {
627                                 free(gids);
628                                 return -errno;
629                         }
630                 } else
631                         k = 0;
632
633                 STRV_FOREACH(i, context->supplementary_groups) {
634                         const char *g;
635
636                         if (k >= ngroups_max) {
637                                 free(gids);
638                                 return -E2BIG;
639                         }
640
641                         g = *i;
642                         r = get_group_creds(&g, gids+k);
643                         if (r < 0) {
644                                 free(gids);
645                                 return r;
646                         }
647
648                         k++;
649                 }
650
651                 if (setgroups(k, gids) < 0) {
652                         free(gids);
653                         return -errno;
654                 }
655
656                 free(gids);
657         }
658
659         return 0;
660 }
661
662 static int enforce_user(const ExecContext *context, uid_t uid) {
663         assert(context);
664
665         /* Sets (but doesn't lookup) the uid and make sure we keep the
666          * capabilities while doing so. */
667
668         if (context->capabilities) {
669                 _cleanup_cap_free_ cap_t d = NULL;
670                 static const cap_value_t bits[] = {
671                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
672                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
673                 };
674
675                 /* First step: If we need to keep capabilities but
676                  * drop privileges we need to make sure we keep our
677                  * caps, while we drop privileges. */
678                 if (uid != 0) {
679                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
680
681                         if (prctl(PR_GET_SECUREBITS) != sb)
682                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
683                                         return -errno;
684                 }
685
686                 /* Second step: set the capabilities. This will reduce
687                  * the capabilities to the minimum we need. */
688
689                 d = cap_dup(context->capabilities);
690                 if (!d)
691                         return -errno;
692
693                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
694                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
695                         return -errno;
696
697                 if (cap_set_proc(d) < 0)
698                         return -errno;
699         }
700
701         /* Third step: actually set the uids */
702         if (setresuid(uid, uid, uid) < 0)
703                 return -errno;
704
705         /* At this point we should have all necessary capabilities but
706            are otherwise a normal user. However, the caps might got
707            corrupted due to the setresuid() so we need clean them up
708            later. This is done outside of this call. */
709
710         return 0;
711 }
712
713 #ifdef HAVE_PAM
714
715 static int null_conv(
716                 int num_msg,
717                 const struct pam_message **msg,
718                 struct pam_response **resp,
719                 void *appdata_ptr) {
720
721         /* We don't support conversations */
722
723         return PAM_CONV_ERR;
724 }
725
726 static int setup_pam(
727                 const char *name,
728                 const char *user,
729                 uid_t uid,
730                 const char *tty,
731                 char ***pam_env,
732                 int fds[], unsigned n_fds) {
733
734         static const struct pam_conv conv = {
735                 .conv = null_conv,
736                 .appdata_ptr = NULL
737         };
738
739         pam_handle_t *handle = NULL;
740         sigset_t ss, old_ss;
741         int pam_code = PAM_SUCCESS;
742         int err;
743         char **e = NULL;
744         bool close_session = false;
745         pid_t pam_pid = 0, parent_pid;
746         int flags = 0;
747
748         assert(name);
749         assert(user);
750         assert(pam_env);
751
752         /* We set up PAM in the parent process, then fork. The child
753          * will then stay around until killed via PR_GET_PDEATHSIG or
754          * systemd via the cgroup logic. It will then remove the PAM
755          * session again. The parent process will exec() the actual
756          * daemon. We do things this way to ensure that the main PID
757          * of the daemon is the one we initially fork()ed. */
758
759         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
760                 flags |= PAM_SILENT;
761
762         pam_code = pam_start(name, user, &conv, &handle);
763         if (pam_code != PAM_SUCCESS) {
764                 handle = NULL;
765                 goto fail;
766         }
767
768         if (tty) {
769                 pam_code = pam_set_item(handle, PAM_TTY, tty);
770                 if (pam_code != PAM_SUCCESS)
771                         goto fail;
772         }
773
774         pam_code = pam_acct_mgmt(handle, flags);
775         if (pam_code != PAM_SUCCESS)
776                 goto fail;
777
778         pam_code = pam_open_session(handle, flags);
779         if (pam_code != PAM_SUCCESS)
780                 goto fail;
781
782         close_session = true;
783
784         e = pam_getenvlist(handle);
785         if (!e) {
786                 pam_code = PAM_BUF_ERR;
787                 goto fail;
788         }
789
790         /* Block SIGTERM, so that we know that it won't get lost in
791          * the child */
792         if (sigemptyset(&ss) < 0 ||
793             sigaddset(&ss, SIGTERM) < 0 ||
794             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
795                 goto fail;
796
797         parent_pid = getpid();
798
799         pam_pid = fork();
800         if (pam_pid < 0)
801                 goto fail;
802
803         if (pam_pid == 0) {
804                 int sig;
805                 int r = EXIT_PAM;
806
807                 /* The child's job is to reset the PAM session on
808                  * termination */
809
810                 /* This string must fit in 10 chars (i.e. the length
811                  * of "/sbin/init"), to look pretty in /bin/ps */
812                 rename_process("(sd-pam)");
813
814                 /* Make sure we don't keep open the passed fds in this
815                 child. We assume that otherwise only those fds are
816                 open here that have been opened by PAM. */
817                 close_many(fds, n_fds);
818
819                 /* Drop privileges - we don't need any to pam_close_session
820                  * and this will make PR_SET_PDEATHSIG work in most cases.
821                  * If this fails, ignore the error - but expect sd-pam threads
822                  * to fail to exit normally */
823                 if (setresuid(uid, uid, uid) < 0)
824                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
825
826                 /* Wait until our parent died. This will only work if
827                  * the above setresuid() succeeds, otherwise the kernel
828                  * will not allow unprivileged parents kill their privileged
829                  * children this way. We rely on the control groups kill logic
830                  * to do the rest for us. */
831                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
832                         goto child_finish;
833
834                 /* Check if our parent process might already have
835                  * died? */
836                 if (getppid() == parent_pid) {
837                         for (;;) {
838                                 if (sigwait(&ss, &sig) < 0) {
839                                         if (errno == EINTR)
840                                                 continue;
841
842                                         goto child_finish;
843                                 }
844
845                                 assert(sig == SIGTERM);
846                                 break;
847                         }
848                 }
849
850                 /* If our parent died we'll end the session */
851                 if (getppid() != parent_pid) {
852                         pam_code = pam_close_session(handle, flags);
853                         if (pam_code != PAM_SUCCESS)
854                                 goto child_finish;
855                 }
856
857                 r = 0;
858
859         child_finish:
860                 pam_end(handle, pam_code | flags);
861                 _exit(r);
862         }
863
864         /* If the child was forked off successfully it will do all the
865          * cleanups, so forget about the handle here. */
866         handle = NULL;
867
868         /* Unblock SIGTERM again in the parent */
869         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
870                 goto fail;
871
872         /* We close the log explicitly here, since the PAM modules
873          * might have opened it, but we don't want this fd around. */
874         closelog();
875
876         *pam_env = e;
877         e = NULL;
878
879         return 0;
880
881 fail:
882         if (pam_code != PAM_SUCCESS) {
883                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
884                 err = -EPERM;  /* PAM errors do not map to errno */
885         } else {
886                 log_error("PAM failed: %m");
887                 err = -errno;
888         }
889
890         if (handle) {
891                 if (close_session)
892                         pam_code = pam_close_session(handle, flags);
893
894                 pam_end(handle, pam_code | flags);
895         }
896
897         strv_free(e);
898
899         closelog();
900
901         if (pam_pid > 1) {
902                 kill(pam_pid, SIGTERM);
903                 kill(pam_pid, SIGCONT);
904         }
905
906         return err;
907 }
908 #endif
909
910 static void rename_process_from_path(const char *path) {
911         char process_name[11];
912         const char *p;
913         size_t l;
914
915         /* This resulting string must fit in 10 chars (i.e. the length
916          * of "/sbin/init") to look pretty in /bin/ps */
917
918         p = basename(path);
919         if (isempty(p)) {
920                 rename_process("(...)");
921                 return;
922         }
923
924         l = strlen(p);
925         if (l > 8) {
926                 /* The end of the process name is usually more
927                  * interesting, since the first bit might just be
928                  * "systemd-" */
929                 p = p + l - 8;
930                 l = 8;
931         }
932
933         process_name[0] = '(';
934         memcpy(process_name+1, p, l);
935         process_name[1+l] = ')';
936         process_name[1+l+1] = 0;
937
938         rename_process(process_name);
939 }
940
941 #ifdef HAVE_SECCOMP
942
943 static int apply_seccomp(const ExecContext *c) {
944         uint32_t negative_action, action;
945         scmp_filter_ctx *seccomp;
946         Iterator i;
947         void *id;
948         int r;
949
950         assert(c);
951
952         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
953
954         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
955         if (!seccomp)
956                 return -ENOMEM;
957
958         if (c->syscall_archs) {
959
960                 SET_FOREACH(id, c->syscall_archs, i) {
961                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
962                         if (r == -EEXIST)
963                                 continue;
964                         if (r < 0)
965                                 goto finish;
966                 }
967
968         } else {
969                 r = seccomp_add_secondary_archs(seccomp);
970                 if (r < 0)
971                         goto finish;
972         }
973
974         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
975         SET_FOREACH(id, c->syscall_filter, i) {
976                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
977                 if (r < 0)
978                         goto finish;
979         }
980
981         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
982         if (r < 0)
983                 goto finish;
984
985         r = seccomp_load(seccomp);
986
987 finish:
988         seccomp_release(seccomp);
989         return r;
990 }
991
992 static int apply_address_families(const ExecContext *c) {
993         scmp_filter_ctx *seccomp;
994         Iterator i;
995         int r;
996
997         assert(c);
998
999         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1000         if (!seccomp)
1001                 return -ENOMEM;
1002
1003         r = seccomp_add_secondary_archs(seccomp);
1004         if (r < 0)
1005                 goto finish;
1006
1007         if (c->address_families_whitelist) {
1008                 int af, first = 0, last = 0;
1009                 void *afp;
1010
1011                 /* If this is a whitelist, we first block the address
1012                  * families that are out of range and then everything
1013                  * that is not in the set. First, we find the lowest
1014                  * and highest address family in the set. */
1015
1016                 SET_FOREACH(afp, c->address_families, i) {
1017                         af = PTR_TO_INT(afp);
1018
1019                         if (af <= 0 || af >= af_max())
1020                                 continue;
1021
1022                         if (first == 0 || af < first)
1023                                 first = af;
1024
1025                         if (last == 0 || af > last)
1026                                 last = af;
1027                 }
1028
1029                 assert((first == 0) == (last == 0));
1030
1031                 if (first == 0) {
1032
1033                         /* No entries in the valid range, block everything */
1034                         r = seccomp_rule_add(
1035                                         seccomp,
1036                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1037                                         SCMP_SYS(socket),
1038                                         0);
1039                         if (r < 0)
1040                                 goto finish;
1041
1042                 } else {
1043
1044                         /* Block everything below the first entry */
1045                         r = seccomp_rule_add(
1046                                         seccomp,
1047                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1048                                         SCMP_SYS(socket),
1049                                         1,
1050                                         SCMP_A0(SCMP_CMP_LT, first));
1051                         if (r < 0)
1052                                 goto finish;
1053
1054                         /* Block everything above the last entry */
1055                         r = seccomp_rule_add(
1056                                         seccomp,
1057                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1058                                         SCMP_SYS(socket),
1059                                         1,
1060                                         SCMP_A0(SCMP_CMP_GT, last));
1061                         if (r < 0)
1062                                 goto finish;
1063
1064                         /* Block everything between the first and last
1065                          * entry */
1066                         for (af = 1; af < af_max(); af++) {
1067
1068                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1069                                         continue;
1070
1071                                 r = seccomp_rule_add(
1072                                                 seccomp,
1073                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1074                                                 SCMP_SYS(socket),
1075                                                 1,
1076                                                 SCMP_A0(SCMP_CMP_EQ, af));
1077                                 if (r < 0)
1078                                         goto finish;
1079                         }
1080                 }
1081
1082         } else {
1083                 void *af;
1084
1085                 /* If this is a blacklist, then generate one rule for
1086                  * each address family that are then combined in OR
1087                  * checks. */
1088
1089                 SET_FOREACH(af, c->address_families, i) {
1090
1091                         r = seccomp_rule_add(
1092                                         seccomp,
1093                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1094                                         SCMP_SYS(socket),
1095                                         1,
1096                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1097                         if (r < 0)
1098                                 goto finish;
1099                 }
1100         }
1101
1102         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1103         if (r < 0)
1104                 goto finish;
1105
1106         r = seccomp_load(seccomp);
1107
1108 finish:
1109         seccomp_release(seccomp);
1110         return r;
1111 }
1112
1113 #endif
1114
1115 static void do_idle_pipe_dance(int idle_pipe[4]) {
1116         assert(idle_pipe);
1117
1118
1119         safe_close(idle_pipe[1]);
1120         safe_close(idle_pipe[2]);
1121
1122         if (idle_pipe[0] >= 0) {
1123                 int r;
1124
1125                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1126
1127                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1128                         /* Signal systemd that we are bored and want to continue. */
1129                         write(idle_pipe[3], "x", 1);
1130
1131                         /* Wait for systemd to react to the signal above. */
1132                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1133                 }
1134
1135                 safe_close(idle_pipe[0]);
1136
1137         }
1138
1139         safe_close(idle_pipe[3]);
1140 }
1141
1142 static int build_environment(
1143                 const ExecContext *c,
1144                 unsigned n_fds,
1145                 usec_t watchdog_usec,
1146                 const char *home,
1147                 const char *username,
1148                 const char *shell,
1149                 char ***ret) {
1150
1151         _cleanup_strv_free_ char **our_env = NULL;
1152         unsigned n_env = 0;
1153         char *x;
1154
1155         assert(c);
1156         assert(ret);
1157
1158         our_env = new0(char*, 10);
1159         if (!our_env)
1160                 return -ENOMEM;
1161
1162         if (n_fds > 0) {
1163                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1164                         return -ENOMEM;
1165                 our_env[n_env++] = x;
1166
1167                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1168                         return -ENOMEM;
1169                 our_env[n_env++] = x;
1170         }
1171
1172         if (watchdog_usec > 0) {
1173                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1174                         return -ENOMEM;
1175                 our_env[n_env++] = x;
1176
1177                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1178                         return -ENOMEM;
1179                 our_env[n_env++] = x;
1180         }
1181
1182         if (home) {
1183                 x = strappend("HOME=", home);
1184                 if (!x)
1185                         return -ENOMEM;
1186                 our_env[n_env++] = x;
1187         }
1188
1189         if (username) {
1190                 x = strappend("LOGNAME=", username);
1191                 if (!x)
1192                         return -ENOMEM;
1193                 our_env[n_env++] = x;
1194
1195                 x = strappend("USER=", username);
1196                 if (!x)
1197                         return -ENOMEM;
1198                 our_env[n_env++] = x;
1199         }
1200
1201         if (shell) {
1202                 x = strappend("SHELL=", shell);
1203                 if (!x)
1204                         return -ENOMEM;
1205                 our_env[n_env++] = x;
1206         }
1207
1208         if (is_terminal_input(c->std_input) ||
1209             c->std_output == EXEC_OUTPUT_TTY ||
1210             c->std_error == EXEC_OUTPUT_TTY ||
1211             c->tty_path) {
1212
1213                 x = strdup(default_term_for_tty(tty_path(c)));
1214                 if (!x)
1215                         return -ENOMEM;
1216                 our_env[n_env++] = x;
1217         }
1218
1219         our_env[n_env++] = NULL;
1220         assert(n_env <= 10);
1221
1222         *ret = our_env;
1223         our_env = NULL;
1224
1225         return 0;
1226 }
1227
1228 static int exec_child(ExecCommand *command,
1229                       const ExecContext *context,
1230                       const ExecParameters *params,
1231                       ExecRuntime *runtime,
1232                       char **argv,
1233                       int socket_fd,
1234                       int *fds, unsigned n_fds,
1235                       char **files_env,
1236                       int *error) {
1237
1238         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1239         const char *username = NULL, *home = NULL, *shell = NULL;
1240         unsigned n_dont_close = 0;
1241         int dont_close[n_fds + 4];
1242         uid_t uid = (uid_t) -1;
1243         gid_t gid = (gid_t) -1;
1244         int i, err;
1245
1246         assert(command);
1247         assert(context);
1248         assert(params);
1249         assert(error);
1250
1251         rename_process_from_path(command->path);
1252
1253         /* We reset exactly these signals, since they are the
1254          * only ones we set to SIG_IGN in the main daemon. All
1255          * others we leave untouched because we set them to
1256          * SIG_DFL or a valid handler initially, both of which
1257          * will be demoted to SIG_DFL. */
1258         default_signals(SIGNALS_CRASH_HANDLER,
1259                         SIGNALS_IGNORE, -1);
1260
1261         if (context->ignore_sigpipe)
1262                 ignore_signals(SIGPIPE, -1);
1263
1264         err = reset_signal_mask();
1265         if (err < 0) {
1266                 *error = EXIT_SIGNAL_MASK;
1267                 return err;
1268         }
1269
1270         if (params->idle_pipe)
1271                 do_idle_pipe_dance(params->idle_pipe);
1272
1273         /* Close sockets very early to make sure we don't
1274          * block init reexecution because it cannot bind its
1275          * sockets */
1276         log_forget_fds();
1277
1278         if (socket_fd >= 0)
1279                 dont_close[n_dont_close++] = socket_fd;
1280         if (n_fds > 0) {
1281                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1282                 n_dont_close += n_fds;
1283         }
1284         if (params->bus_endpoint_fd >= 0)
1285                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1286         if (runtime) {
1287                 if (runtime->netns_storage_socket[0] >= 0)
1288                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1289                 if (runtime->netns_storage_socket[1] >= 0)
1290                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1291         }
1292
1293         err = close_all_fds(dont_close, n_dont_close);
1294         if (err < 0) {
1295                 *error = EXIT_FDS;
1296                 return err;
1297         }
1298
1299         if (!context->same_pgrp)
1300                 if (setsid() < 0) {
1301                         *error = EXIT_SETSID;
1302                         return -errno;
1303                 }
1304
1305         exec_context_tty_reset(context);
1306
1307         if (params->confirm_spawn) {
1308                 char response;
1309
1310                 err = ask_for_confirmation(&response, argv);
1311                 if (err == -ETIMEDOUT)
1312                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1313                 else if (err < 0)
1314                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1315                 else if (response == 's') {
1316                         write_confirm_message("Skipping execution.\n");
1317                         *error = EXIT_CONFIRM;
1318                         return -ECANCELED;
1319                 } else if (response == 'n') {
1320                         write_confirm_message("Failing execution.\n");
1321                         *error = 0;
1322                         return 0;
1323                 }
1324         }
1325
1326         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1327          * must sure to drop O_NONBLOCK */
1328         if (socket_fd >= 0)
1329                 fd_nonblock(socket_fd, false);
1330
1331         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1332         if (err < 0) {
1333                 *error = EXIT_STDIN;
1334                 return err;
1335         }
1336
1337         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1338         if (err < 0) {
1339                 *error = EXIT_STDOUT;
1340                 return err;
1341         }
1342
1343         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1344         if (err < 0) {
1345                 *error = EXIT_STDERR;
1346                 return err;
1347         }
1348
1349         if (params->cgroup_path) {
1350                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1351                 if (err < 0) {
1352                         *error = EXIT_CGROUP;
1353                         return err;
1354                 }
1355         }
1356
1357         if (context->oom_score_adjust_set) {
1358                 char t[16];
1359
1360                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1361                 char_array_0(t);
1362
1363                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1364                         *error = EXIT_OOM_ADJUST;
1365                         return -errno;
1366                 }
1367         }
1368
1369         if (context->nice_set)
1370                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1371                         *error = EXIT_NICE;
1372                         return -errno;
1373                 }
1374
1375         if (context->cpu_sched_set) {
1376                 struct sched_param param = {
1377                         .sched_priority = context->cpu_sched_priority,
1378                 };
1379
1380                 err = sched_setscheduler(0,
1381                                          context->cpu_sched_policy |
1382                                          (context->cpu_sched_reset_on_fork ?
1383                                           SCHED_RESET_ON_FORK : 0),
1384                                          &param);
1385                 if (err < 0) {
1386                         *error = EXIT_SETSCHEDULER;
1387                         return -errno;
1388                 }
1389         }
1390
1391         if (context->cpuset)
1392                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1393                         *error = EXIT_CPUAFFINITY;
1394                         return -errno;
1395                 }
1396
1397         if (context->ioprio_set)
1398                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1399                         *error = EXIT_IOPRIO;
1400                         return -errno;
1401                 }
1402
1403         if (context->timer_slack_nsec != NSEC_INFINITY)
1404                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1405                         *error = EXIT_TIMERSLACK;
1406                         return -errno;
1407                 }
1408
1409         if (context->personality != 0xffffffffUL)
1410                 if (personality(context->personality) < 0) {
1411                         *error = EXIT_PERSONALITY;
1412                         return -errno;
1413                 }
1414
1415         if (context->utmp_id)
1416                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1417
1418         if (context->user) {
1419                 username = context->user;
1420                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1421                 if (err < 0) {
1422                         *error = EXIT_USER;
1423                         return err;
1424                 }
1425
1426                 if (is_terminal_input(context->std_input)) {
1427                         err = chown_terminal(STDIN_FILENO, uid);
1428                         if (err < 0) {
1429                                 *error = EXIT_STDIN;
1430                                 return err;
1431                         }
1432                 }
1433         }
1434
1435 #ifdef ENABLE_KDBUS
1436         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1437                 uid_t ep_uid = (uid == (uid_t) -1) ? 0 : uid;
1438
1439                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1440                 if (err < 0) {
1441                         *error = EXIT_BUS_ENDPOINT;
1442                         return err;
1443                 }
1444         }
1445 #endif
1446
1447         /* If delegation is enabled we'll pass ownership of the cgroup
1448          * (but only in systemd's own controller hierarchy!) to the
1449          * user of the new process. */
1450         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1451                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1452                 if (err < 0) {
1453                         *error = EXIT_CGROUP;
1454                         return err;
1455                 }
1456
1457
1458                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1459                 if (err < 0) {
1460                         *error = EXIT_CGROUP;
1461                         return err;
1462                 }
1463         }
1464
1465         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1466                 char **rt;
1467
1468                 STRV_FOREACH(rt, context->runtime_directory) {
1469                         _cleanup_free_ char *p;
1470
1471                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1472                         if (!p) {
1473                                 *error = EXIT_RUNTIME_DIRECTORY;
1474                                 return -ENOMEM;
1475                         }
1476
1477                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1478                         if (err < 0) {
1479                                 *error = EXIT_RUNTIME_DIRECTORY;
1480                                 return err;
1481                         }
1482                 }
1483         }
1484
1485         if (params->apply_permissions) {
1486                 err = enforce_groups(context, username, gid);
1487                 if (err < 0) {
1488                         *error = EXIT_GROUP;
1489                         return err;
1490                 }
1491         }
1492
1493         umask(context->umask);
1494
1495 #ifdef HAVE_PAM
1496         if (params->apply_permissions && context->pam_name && username) {
1497                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1498                 if (err < 0) {
1499                         *error = EXIT_PAM;
1500                         return err;
1501                 }
1502         }
1503 #endif
1504
1505         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1506                 err = setup_netns(runtime->netns_storage_socket);
1507                 if (err < 0) {
1508                         *error = EXIT_NETWORK;
1509                         return err;
1510                 }
1511         }
1512
1513         if (!strv_isempty(context->read_write_dirs) ||
1514             !strv_isempty(context->read_only_dirs) ||
1515             !strv_isempty(context->inaccessible_dirs) ||
1516             context->mount_flags != 0 ||
1517             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1518             params->bus_endpoint_path ||
1519             context->private_devices ||
1520             context->protect_system != PROTECT_SYSTEM_NO ||
1521             context->protect_home != PROTECT_HOME_NO) {
1522
1523                 char *tmp = NULL, *var = NULL;
1524
1525                 /* The runtime struct only contains the parent
1526                  * of the private /tmp, which is
1527                  * non-accessible to world users. Inside of it
1528                  * there's a /tmp that is sticky, and that's
1529                  * the one we want to use here. */
1530
1531                 if (context->private_tmp && runtime) {
1532                         if (runtime->tmp_dir)
1533                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1534                         if (runtime->var_tmp_dir)
1535                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1536                 }
1537
1538                 err = setup_namespace(
1539                                 context->read_write_dirs,
1540                                 context->read_only_dirs,
1541                                 context->inaccessible_dirs,
1542                                 tmp,
1543                                 var,
1544                                 params->bus_endpoint_path,
1545                                 context->private_devices,
1546                                 context->protect_home,
1547                                 context->protect_system,
1548                                 context->mount_flags);
1549
1550                 if (err == -EPERM)
1551                         log_warning_unit(params->unit_id, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %s", strerror(-err));
1552                 else if (err < 0) {
1553                         *error = EXIT_NAMESPACE;
1554                         return err;
1555                 }
1556         }
1557
1558         if (params->apply_chroot) {
1559                 if (context->root_directory)
1560                         if (chroot(context->root_directory) < 0) {
1561                                 *error = EXIT_CHROOT;
1562                                 return -errno;
1563                         }
1564
1565                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1566                         *error = EXIT_CHDIR;
1567                         return -errno;
1568                 }
1569         } else {
1570                 _cleanup_free_ char *d = NULL;
1571
1572                 if (asprintf(&d, "%s/%s",
1573                              context->root_directory ? context->root_directory : "",
1574                              context->working_directory ? context->working_directory : "") < 0) {
1575                         *error = EXIT_MEMORY;
1576                         return -ENOMEM;
1577                 }
1578
1579                 if (chdir(d) < 0) {
1580                         *error = EXIT_CHDIR;
1581                         return -errno;
1582                 }
1583         }
1584
1585         /* We repeat the fd closing here, to make sure that
1586          * nothing is leaked from the PAM modules. Note that
1587          * we are more aggressive this time since socket_fd
1588          * and the netns fds we don't need anymore. The custom
1589          * endpoint fd was needed to upload the policy and can
1590          * now be closed as well. */
1591         err = close_all_fds(fds, n_fds);
1592         if (err >= 0)
1593                 err = shift_fds(fds, n_fds);
1594         if (err >= 0)
1595                 err = flags_fds(fds, n_fds, context->non_blocking);
1596         if (err < 0) {
1597                 *error = EXIT_FDS;
1598                 return err;
1599         }
1600
1601         if (params->apply_permissions) {
1602
1603                 for (i = 0; i < _RLIMIT_MAX; i++) {
1604                         if (!context->rlimit[i])
1605                                 continue;
1606
1607                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1608                                 *error = EXIT_LIMITS;
1609                                 return -errno;
1610                         }
1611                 }
1612
1613                 if (context->capability_bounding_set_drop) {
1614                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1615                         if (err < 0) {
1616                                 *error = EXIT_CAPABILITIES;
1617                                 return err;
1618                         }
1619                 }
1620
1621                 if (context->user) {
1622                         err = enforce_user(context, uid);
1623                         if (err < 0) {
1624                                 *error = EXIT_USER;
1625                                 return err;
1626                         }
1627                 }
1628
1629                 /* PR_GET_SECUREBITS is not privileged, while
1630                  * PR_SET_SECUREBITS is. So to suppress
1631                  * potential EPERMs we'll try not to call
1632                  * PR_SET_SECUREBITS unless necessary. */
1633                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1634                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1635                                 *error = EXIT_SECUREBITS;
1636                                 return -errno;
1637                         }
1638
1639                 if (context->capabilities)
1640                         if (cap_set_proc(context->capabilities) < 0) {
1641                                 *error = EXIT_CAPABILITIES;
1642                                 return -errno;
1643                         }
1644
1645                 if (context->no_new_privileges)
1646                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1647                                 *error = EXIT_NO_NEW_PRIVILEGES;
1648                                 return -errno;
1649                         }
1650
1651 #ifdef HAVE_SECCOMP
1652                 if (context->address_families_whitelist ||
1653                     !set_isempty(context->address_families)) {
1654                         err = apply_address_families(context);
1655                         if (err < 0) {
1656                                 *error = EXIT_ADDRESS_FAMILIES;
1657                                 return err;
1658                         }
1659                 }
1660
1661                 if (context->syscall_whitelist ||
1662                     !set_isempty(context->syscall_filter) ||
1663                     !set_isempty(context->syscall_archs)) {
1664                         err = apply_seccomp(context);
1665                         if (err < 0) {
1666                                 *error = EXIT_SECCOMP;
1667                                 return err;
1668                         }
1669                 }
1670 #endif
1671
1672 #ifdef HAVE_SELINUX
1673                 if (mac_selinux_use()) {
1674                         if (context->selinux_context) {
1675                                 err = setexeccon(context->selinux_context);
1676                                 if (err < 0 && !context->selinux_context_ignore) {
1677                                         *error = EXIT_SELINUX_CONTEXT;
1678                                         return err;
1679                                 }
1680                         }
1681
1682                         if (params->selinux_context_net && socket_fd >= 0) {
1683                                 _cleanup_free_ char *label = NULL;
1684
1685                                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, &label);
1686                                 if (err < 0) {
1687                                         *error = EXIT_SELINUX_CONTEXT;
1688                                         return err;
1689                                 }
1690
1691                                 err = setexeccon(label);
1692                                 if (err < 0) {
1693                                         *error = EXIT_SELINUX_CONTEXT;
1694                                         return err;
1695                                 }
1696                         }
1697                 }
1698 #endif
1699
1700 #ifdef HAVE_APPARMOR
1701                 if (context->apparmor_profile && mac_apparmor_use()) {
1702                         err = aa_change_onexec(context->apparmor_profile);
1703                         if (err < 0 && !context->apparmor_profile_ignore) {
1704                                 *error = EXIT_APPARMOR_PROFILE;
1705                                 return -errno;
1706                         }
1707                 }
1708 #endif
1709         }
1710
1711         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1712         if (err < 0) {
1713                 *error = EXIT_MEMORY;
1714                 return err;
1715         }
1716
1717         final_env = strv_env_merge(5,
1718                                    params->environment,
1719                                    our_env,
1720                                    context->environment,
1721                                    files_env,
1722                                    pam_env,
1723                                    NULL);
1724         if (!final_env) {
1725                 *error = EXIT_MEMORY;
1726                 return -ENOMEM;
1727         }
1728
1729         final_argv = replace_env_argv(argv, final_env);
1730         if (!final_argv) {
1731                 *error = EXIT_MEMORY;
1732                 return -ENOMEM;
1733         }
1734
1735         final_env = strv_env_clean(final_env);
1736
1737         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1738                 _cleanup_free_ char *line;
1739
1740                 line = exec_command_line(final_argv);
1741                 if (line) {
1742                         log_open();
1743                         log_struct_unit(LOG_DEBUG,
1744                                         params->unit_id,
1745                                         "EXECUTABLE=%s", command->path,
1746                                         "MESSAGE=Executing: %s", line,
1747                                         NULL);
1748                         log_close();
1749                 }
1750         }
1751         execve(command->path, final_argv, final_env);
1752         *error = EXIT_EXEC;
1753         return -errno;
1754 }
1755
1756 int exec_spawn(ExecCommand *command,
1757                const ExecContext *context,
1758                const ExecParameters *params,
1759                ExecRuntime *runtime,
1760                pid_t *ret) {
1761
1762         _cleanup_strv_free_ char **files_env = NULL;
1763         int *fds = NULL; unsigned n_fds = 0;
1764         char *line, **argv;
1765         int socket_fd;
1766         pid_t pid;
1767         int err;
1768
1769         assert(command);
1770         assert(context);
1771         assert(ret);
1772         assert(params);
1773         assert(params->fds || params->n_fds <= 0);
1774
1775         if (context->std_input == EXEC_INPUT_SOCKET ||
1776             context->std_output == EXEC_OUTPUT_SOCKET ||
1777             context->std_error == EXEC_OUTPUT_SOCKET) {
1778
1779                 if (params->n_fds != 1)
1780                         return -EINVAL;
1781
1782                 socket_fd = params->fds[0];
1783         } else {
1784                 socket_fd = -1;
1785                 fds = params->fds;
1786                 n_fds = params->n_fds;
1787         }
1788
1789         err = exec_context_load_environment(context, params->unit_id, &files_env);
1790         if (err < 0) {
1791                 log_struct_unit(LOG_ERR,
1792                            params->unit_id,
1793                            "MESSAGE=Failed to load environment files: %s", strerror(-err),
1794                            "ERRNO=%d", -err,
1795                            NULL);
1796                 return err;
1797         }
1798
1799         argv = params->argv ?: command->argv;
1800
1801         line = exec_command_line(argv);
1802         if (!line)
1803                 return log_oom();
1804
1805         log_struct_unit(LOG_DEBUG,
1806                         params->unit_id,
1807                         "EXECUTABLE=%s", command->path,
1808                         "MESSAGE=About to execute: %s", line,
1809                         NULL);
1810         free(line);
1811
1812         pid = fork();
1813         if (pid < 0)
1814                 return -errno;
1815
1816         if (pid == 0) {
1817                 int r;
1818
1819                 err = exec_child(command,
1820                                  context,
1821                                  params,
1822                                  runtime,
1823                                  argv,
1824                                  socket_fd,
1825                                  fds, n_fds,
1826                                  files_env,
1827                                  &r);
1828                 if (r != 0) {
1829                         log_open();
1830                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1831                                    "EXECUTABLE=%s", command->path,
1832                                    "MESSAGE=Failed at step %s spawning %s: %s",
1833                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1834                                           command->path, strerror(-err),
1835                                    "ERRNO=%d", -err,
1836                                    NULL);
1837                         log_close();
1838                 }
1839
1840                 _exit(r);
1841         }
1842
1843         log_struct_unit(LOG_DEBUG,
1844                         params->unit_id,
1845                         "MESSAGE=Forked %s as "PID_FMT,
1846                         command->path, pid,
1847                         NULL);
1848
1849         /* We add the new process to the cgroup both in the child (so
1850          * that we can be sure that no user code is ever executed
1851          * outside of the cgroup) and in the parent (so that we can be
1852          * sure that when we kill the cgroup the process will be
1853          * killed too). */
1854         if (params->cgroup_path)
1855                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1856
1857         exec_status_start(&command->exec_status, pid);
1858
1859         *ret = pid;
1860         return 0;
1861 }
1862
1863 void exec_context_init(ExecContext *c) {
1864         assert(c);
1865
1866         c->umask = 0022;
1867         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1868         c->cpu_sched_policy = SCHED_OTHER;
1869         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1870         c->syslog_level_prefix = true;
1871         c->ignore_sigpipe = true;
1872         c->timer_slack_nsec = NSEC_INFINITY;
1873         c->personality = 0xffffffffUL;
1874         c->runtime_directory_mode = 0755;
1875 }
1876
1877 void exec_context_done(ExecContext *c) {
1878         unsigned l;
1879
1880         assert(c);
1881
1882         strv_free(c->environment);
1883         c->environment = NULL;
1884
1885         strv_free(c->environment_files);
1886         c->environment_files = NULL;
1887
1888         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1889                 free(c->rlimit[l]);
1890                 c->rlimit[l] = NULL;
1891         }
1892
1893         free(c->working_directory);
1894         c->working_directory = NULL;
1895         free(c->root_directory);
1896         c->root_directory = NULL;
1897
1898         free(c->tty_path);
1899         c->tty_path = NULL;
1900
1901         free(c->syslog_identifier);
1902         c->syslog_identifier = NULL;
1903
1904         free(c->user);
1905         c->user = NULL;
1906
1907         free(c->group);
1908         c->group = NULL;
1909
1910         strv_free(c->supplementary_groups);
1911         c->supplementary_groups = NULL;
1912
1913         free(c->pam_name);
1914         c->pam_name = NULL;
1915
1916         if (c->capabilities) {
1917                 cap_free(c->capabilities);
1918                 c->capabilities = NULL;
1919         }
1920
1921         strv_free(c->read_only_dirs);
1922         c->read_only_dirs = NULL;
1923
1924         strv_free(c->read_write_dirs);
1925         c->read_write_dirs = NULL;
1926
1927         strv_free(c->inaccessible_dirs);
1928         c->inaccessible_dirs = NULL;
1929
1930         if (c->cpuset)
1931                 CPU_FREE(c->cpuset);
1932
1933         free(c->utmp_id);
1934         c->utmp_id = NULL;
1935
1936         free(c->selinux_context);
1937         c->selinux_context = NULL;
1938
1939         free(c->apparmor_profile);
1940         c->apparmor_profile = NULL;
1941
1942         set_free(c->syscall_filter);
1943         c->syscall_filter = NULL;
1944
1945         set_free(c->syscall_archs);
1946         c->syscall_archs = NULL;
1947
1948         set_free(c->address_families);
1949         c->address_families = NULL;
1950
1951         strv_free(c->runtime_directory);
1952         c->runtime_directory = NULL;
1953
1954         bus_endpoint_free(c->bus_endpoint);
1955         c->bus_endpoint = NULL;
1956 }
1957
1958 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1959         char **i;
1960
1961         assert(c);
1962
1963         if (!runtime_prefix)
1964                 return 0;
1965
1966         STRV_FOREACH(i, c->runtime_directory) {
1967                 _cleanup_free_ char *p;
1968
1969                 p = strjoin(runtime_prefix, "/", *i, NULL);
1970                 if (!p)
1971                         return -ENOMEM;
1972
1973                 /* We execute this synchronously, since we need to be
1974                  * sure this is gone when we start the service
1975                  * next. */
1976                 rm_rf_dangerous(p, false, true, false);
1977         }
1978
1979         return 0;
1980 }
1981
1982 void exec_command_done(ExecCommand *c) {
1983         assert(c);
1984
1985         free(c->path);
1986         c->path = NULL;
1987
1988         strv_free(c->argv);
1989         c->argv = NULL;
1990 }
1991
1992 void exec_command_done_array(ExecCommand *c, unsigned n) {
1993         unsigned i;
1994
1995         for (i = 0; i < n; i++)
1996                 exec_command_done(c+i);
1997 }
1998
1999 void exec_command_free_list(ExecCommand *c) {
2000         ExecCommand *i;
2001
2002         while ((i = c)) {
2003                 LIST_REMOVE(command, c, i);
2004                 exec_command_done(i);
2005                 free(i);
2006         }
2007 }
2008
2009 void exec_command_free_array(ExecCommand **c, unsigned n) {
2010         unsigned i;
2011
2012         for (i = 0; i < n; i++) {
2013                 exec_command_free_list(c[i]);
2014                 c[i] = NULL;
2015         }
2016 }
2017
2018 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2019         char **i, **r = NULL;
2020
2021         assert(c);
2022         assert(l);
2023
2024         STRV_FOREACH(i, c->environment_files) {
2025                 char *fn;
2026                 int k;
2027                 bool ignore = false;
2028                 char **p;
2029                 _cleanup_globfree_ glob_t pglob = {};
2030                 int count, n;
2031
2032                 fn = *i;
2033
2034                 if (fn[0] == '-') {
2035                         ignore = true;
2036                         fn ++;
2037                 }
2038
2039                 if (!path_is_absolute(fn)) {
2040                         if (ignore)
2041                                 continue;
2042
2043                         strv_free(r);
2044                         return -EINVAL;
2045                 }
2046
2047                 /* Filename supports globbing, take all matching files */
2048                 errno = 0;
2049                 if (glob(fn, 0, NULL, &pglob) != 0) {
2050                         if (ignore)
2051                                 continue;
2052
2053                         strv_free(r);
2054                         return errno ? -errno : -EINVAL;
2055                 }
2056                 count = pglob.gl_pathc;
2057                 if (count == 0) {
2058                         if (ignore)
2059                                 continue;
2060
2061                         strv_free(r);
2062                         return -EINVAL;
2063                 }
2064                 for (n = 0; n < count; n++) {
2065                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2066                         if (k < 0) {
2067                                 if (ignore)
2068                                         continue;
2069
2070                                 strv_free(r);
2071                                 return k;
2072                         }
2073                         /* Log invalid environment variables with filename */
2074                         if (p)
2075                                 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2076
2077                         if (r == NULL)
2078                                 r = p;
2079                         else {
2080                                 char **m;
2081
2082                                 m = strv_env_merge(2, r, p);
2083                                 strv_free(r);
2084                                 strv_free(p);
2085                                 if (!m)
2086                                         return -ENOMEM;
2087
2088                                 r = m;
2089                         }
2090                 }
2091         }
2092
2093         *l = r;
2094
2095         return 0;
2096 }
2097
2098 static bool tty_may_match_dev_console(const char *tty) {
2099         _cleanup_free_ char *active = NULL;
2100        char *console;
2101
2102         if (startswith(tty, "/dev/"))
2103                 tty += 5;
2104
2105         /* trivial identity? */
2106         if (streq(tty, "console"))
2107                 return true;
2108
2109         console = resolve_dev_console(&active);
2110         /* if we could not resolve, assume it may */
2111         if (!console)
2112                 return true;
2113
2114         /* "tty0" means the active VC, so it may be the same sometimes */
2115         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2116 }
2117
2118 bool exec_context_may_touch_console(ExecContext *ec) {
2119         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2120                 is_terminal_input(ec->std_input) ||
2121                 is_terminal_output(ec->std_output) ||
2122                 is_terminal_output(ec->std_error)) &&
2123                tty_may_match_dev_console(tty_path(ec));
2124 }
2125
2126 static void strv_fprintf(FILE *f, char **l) {
2127         char **g;
2128
2129         assert(f);
2130
2131         STRV_FOREACH(g, l)
2132                 fprintf(f, " %s", *g);
2133 }
2134
2135 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2136         char **e;
2137         unsigned i;
2138
2139         assert(c);
2140         assert(f);
2141
2142         prefix = strempty(prefix);
2143
2144         fprintf(f,
2145                 "%sUMask: %04o\n"
2146                 "%sWorkingDirectory: %s\n"
2147                 "%sRootDirectory: %s\n"
2148                 "%sNonBlocking: %s\n"
2149                 "%sPrivateTmp: %s\n"
2150                 "%sPrivateNetwork: %s\n"
2151                 "%sPrivateDevices: %s\n"
2152                 "%sProtectHome: %s\n"
2153                 "%sProtectSystem: %s\n"
2154                 "%sIgnoreSIGPIPE: %s\n",
2155                 prefix, c->umask,
2156                 prefix, c->working_directory ? c->working_directory : "/",
2157                 prefix, c->root_directory ? c->root_directory : "/",
2158                 prefix, yes_no(c->non_blocking),
2159                 prefix, yes_no(c->private_tmp),
2160                 prefix, yes_no(c->private_network),
2161                 prefix, yes_no(c->private_devices),
2162                 prefix, protect_home_to_string(c->protect_home),
2163                 prefix, protect_system_to_string(c->protect_system),
2164                 prefix, yes_no(c->ignore_sigpipe));
2165
2166         STRV_FOREACH(e, c->environment)
2167                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2168
2169         STRV_FOREACH(e, c->environment_files)
2170                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2171
2172         if (c->nice_set)
2173                 fprintf(f,
2174                         "%sNice: %i\n",
2175                         prefix, c->nice);
2176
2177         if (c->oom_score_adjust_set)
2178                 fprintf(f,
2179                         "%sOOMScoreAdjust: %i\n",
2180                         prefix, c->oom_score_adjust);
2181
2182         for (i = 0; i < RLIM_NLIMITS; i++)
2183                 if (c->rlimit[i])
2184                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2185                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2186
2187         if (c->ioprio_set) {
2188                 _cleanup_free_ char *class_str = NULL;
2189
2190                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2191                 fprintf(f,
2192                         "%sIOSchedulingClass: %s\n"
2193                         "%sIOPriority: %i\n",
2194                         prefix, strna(class_str),
2195                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2196         }
2197
2198         if (c->cpu_sched_set) {
2199                 _cleanup_free_ char *policy_str = NULL;
2200
2201                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2202                 fprintf(f,
2203                         "%sCPUSchedulingPolicy: %s\n"
2204                         "%sCPUSchedulingPriority: %i\n"
2205                         "%sCPUSchedulingResetOnFork: %s\n",
2206                         prefix, strna(policy_str),
2207                         prefix, c->cpu_sched_priority,
2208                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2209         }
2210
2211         if (c->cpuset) {
2212                 fprintf(f, "%sCPUAffinity:", prefix);
2213                 for (i = 0; i < c->cpuset_ncpus; i++)
2214                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2215                                 fprintf(f, " %u", i);
2216                 fputs("\n", f);
2217         }
2218
2219         if (c->timer_slack_nsec != NSEC_INFINITY)
2220                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2221
2222         fprintf(f,
2223                 "%sStandardInput: %s\n"
2224                 "%sStandardOutput: %s\n"
2225                 "%sStandardError: %s\n",
2226                 prefix, exec_input_to_string(c->std_input),
2227                 prefix, exec_output_to_string(c->std_output),
2228                 prefix, exec_output_to_string(c->std_error));
2229
2230         if (c->tty_path)
2231                 fprintf(f,
2232                         "%sTTYPath: %s\n"
2233                         "%sTTYReset: %s\n"
2234                         "%sTTYVHangup: %s\n"
2235                         "%sTTYVTDisallocate: %s\n",
2236                         prefix, c->tty_path,
2237                         prefix, yes_no(c->tty_reset),
2238                         prefix, yes_no(c->tty_vhangup),
2239                         prefix, yes_no(c->tty_vt_disallocate));
2240
2241         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2242             c->std_output == EXEC_OUTPUT_KMSG ||
2243             c->std_output == EXEC_OUTPUT_JOURNAL ||
2244             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2245             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2246             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2247             c->std_error == EXEC_OUTPUT_SYSLOG ||
2248             c->std_error == EXEC_OUTPUT_KMSG ||
2249             c->std_error == EXEC_OUTPUT_JOURNAL ||
2250             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2251             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2252             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2253
2254                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2255
2256                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2257                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2258
2259                 fprintf(f,
2260                         "%sSyslogFacility: %s\n"
2261                         "%sSyslogLevel: %s\n",
2262                         prefix, strna(fac_str),
2263                         prefix, strna(lvl_str));
2264         }
2265
2266         if (c->capabilities) {
2267                 _cleanup_cap_free_charp_ char *t;
2268
2269                 t = cap_to_text(c->capabilities, NULL);
2270                 if (t)
2271                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2272         }
2273
2274         if (c->secure_bits)
2275                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2276                         prefix,
2277                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2278                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2279                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2280                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2281                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2282                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2283
2284         if (c->capability_bounding_set_drop) {
2285                 unsigned long l;
2286                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2287
2288                 for (l = 0; l <= cap_last_cap(); l++)
2289                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2290                                 _cleanup_cap_free_charp_ char *t;
2291
2292                                 t = cap_to_name(l);
2293                                 if (t)
2294                                         fprintf(f, " %s", t);
2295                         }
2296
2297                 fputs("\n", f);
2298         }
2299
2300         if (c->user)
2301                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2302         if (c->group)
2303                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2304
2305         if (strv_length(c->supplementary_groups) > 0) {
2306                 fprintf(f, "%sSupplementaryGroups:", prefix);
2307                 strv_fprintf(f, c->supplementary_groups);
2308                 fputs("\n", f);
2309         }
2310
2311         if (c->pam_name)
2312                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2313
2314         if (strv_length(c->read_write_dirs) > 0) {
2315                 fprintf(f, "%sReadWriteDirs:", prefix);
2316                 strv_fprintf(f, c->read_write_dirs);
2317                 fputs("\n", f);
2318         }
2319
2320         if (strv_length(c->read_only_dirs) > 0) {
2321                 fprintf(f, "%sReadOnlyDirs:", prefix);
2322                 strv_fprintf(f, c->read_only_dirs);
2323                 fputs("\n", f);
2324         }
2325
2326         if (strv_length(c->inaccessible_dirs) > 0) {
2327                 fprintf(f, "%sInaccessibleDirs:", prefix);
2328                 strv_fprintf(f, c->inaccessible_dirs);
2329                 fputs("\n", f);
2330         }
2331
2332         if (c->utmp_id)
2333                 fprintf(f,
2334                         "%sUtmpIdentifier: %s\n",
2335                         prefix, c->utmp_id);
2336
2337         if (c->selinux_context)
2338                 fprintf(f,
2339                         "%sSELinuxContext: %s%s\n",
2340                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2341
2342         if (c->personality != 0xffffffffUL)
2343                 fprintf(f,
2344                         "%sPersonality: %s\n",
2345                         prefix, strna(personality_to_string(c->personality)));
2346
2347         if (c->syscall_filter) {
2348 #ifdef HAVE_SECCOMP
2349                 Iterator j;
2350                 void *id;
2351                 bool first = true;
2352 #endif
2353
2354                 fprintf(f,
2355                         "%sSystemCallFilter: ",
2356                         prefix);
2357
2358                 if (!c->syscall_whitelist)
2359                         fputc('~', f);
2360
2361 #ifdef HAVE_SECCOMP
2362                 SET_FOREACH(id, c->syscall_filter, j) {
2363                         _cleanup_free_ char *name = NULL;
2364
2365                         if (first)
2366                                 first = false;
2367                         else
2368                                 fputc(' ', f);
2369
2370                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2371                         fputs(strna(name), f);
2372                 }
2373 #endif
2374
2375                 fputc('\n', f);
2376         }
2377
2378         if (c->syscall_archs) {
2379 #ifdef HAVE_SECCOMP
2380                 Iterator j;
2381                 void *id;
2382 #endif
2383
2384                 fprintf(f,
2385                         "%sSystemCallArchitectures:",
2386                         prefix);
2387
2388 #ifdef HAVE_SECCOMP
2389                 SET_FOREACH(id, c->syscall_archs, j)
2390                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2391 #endif
2392                 fputc('\n', f);
2393         }
2394
2395         if (c->syscall_errno != 0)
2396                 fprintf(f,
2397                         "%sSystemCallErrorNumber: %s\n",
2398                         prefix, strna(errno_to_name(c->syscall_errno)));
2399
2400         if (c->apparmor_profile)
2401                 fprintf(f,
2402                         "%sAppArmorProfile: %s%s\n",
2403                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2404 }
2405
2406 bool exec_context_maintains_privileges(ExecContext *c) {
2407         assert(c);
2408
2409         /* Returns true if the process forked off would run run under
2410          * an unchanged UID or as root. */
2411
2412         if (!c->user)
2413                 return true;
2414
2415         if (streq(c->user, "root") || streq(c->user, "0"))
2416                 return true;
2417
2418         return false;
2419 }
2420
2421 void exec_status_start(ExecStatus *s, pid_t pid) {
2422         assert(s);
2423
2424         zero(*s);
2425         s->pid = pid;
2426         dual_timestamp_get(&s->start_timestamp);
2427 }
2428
2429 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2430         assert(s);
2431
2432         if (s->pid && s->pid != pid)
2433                 zero(*s);
2434
2435         s->pid = pid;
2436         dual_timestamp_get(&s->exit_timestamp);
2437
2438         s->code = code;
2439         s->status = status;
2440
2441         if (context) {
2442                 if (context->utmp_id)
2443                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2444
2445                 exec_context_tty_reset(context);
2446         }
2447 }
2448
2449 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2450         char buf[FORMAT_TIMESTAMP_MAX];
2451
2452         assert(s);
2453         assert(f);
2454
2455         if (s->pid <= 0)
2456                 return;
2457
2458         prefix = strempty(prefix);
2459
2460         fprintf(f,
2461                 "%sPID: "PID_FMT"\n",
2462                 prefix, s->pid);
2463
2464         if (s->start_timestamp.realtime > 0)
2465                 fprintf(f,
2466                         "%sStart Timestamp: %s\n",
2467                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2468
2469         if (s->exit_timestamp.realtime > 0)
2470                 fprintf(f,
2471                         "%sExit Timestamp: %s\n"
2472                         "%sExit Code: %s\n"
2473                         "%sExit Status: %i\n",
2474                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2475                         prefix, sigchld_code_to_string(s->code),
2476                         prefix, s->status);
2477 }
2478
2479 char *exec_command_line(char **argv) {
2480         size_t k;
2481         char *n, *p, **a;
2482         bool first = true;
2483
2484         assert(argv);
2485
2486         k = 1;
2487         STRV_FOREACH(a, argv)
2488                 k += strlen(*a)+3;
2489
2490         if (!(n = new(char, k)))
2491                 return NULL;
2492
2493         p = n;
2494         STRV_FOREACH(a, argv) {
2495
2496                 if (!first)
2497                         *(p++) = ' ';
2498                 else
2499                         first = false;
2500
2501                 if (strpbrk(*a, WHITESPACE)) {
2502                         *(p++) = '\'';
2503                         p = stpcpy(p, *a);
2504                         *(p++) = '\'';
2505                 } else
2506                         p = stpcpy(p, *a);
2507
2508         }
2509
2510         *p = 0;
2511
2512         /* FIXME: this doesn't really handle arguments that have
2513          * spaces and ticks in them */
2514
2515         return n;
2516 }
2517
2518 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2519         _cleanup_free_ char *cmd = NULL;
2520         const char *prefix2;
2521
2522         assert(c);
2523         assert(f);
2524
2525         prefix = strempty(prefix);
2526         prefix2 = strappenda(prefix, "\t");
2527
2528         cmd = exec_command_line(c->argv);
2529         fprintf(f,
2530                 "%sCommand Line: %s\n",
2531                 prefix, cmd ? cmd : strerror(ENOMEM));
2532
2533         exec_status_dump(&c->exec_status, f, prefix2);
2534 }
2535
2536 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2537         assert(f);
2538
2539         prefix = strempty(prefix);
2540
2541         LIST_FOREACH(command, c, c)
2542                 exec_command_dump(c, f, prefix);
2543 }
2544
2545 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2546         ExecCommand *end;
2547
2548         assert(l);
2549         assert(e);
2550
2551         if (*l) {
2552                 /* It's kind of important, that we keep the order here */
2553                 LIST_FIND_TAIL(command, *l, end);
2554                 LIST_INSERT_AFTER(command, *l, end, e);
2555         } else
2556               *l = e;
2557 }
2558
2559 int exec_command_set(ExecCommand *c, const char *path, ...) {
2560         va_list ap;
2561         char **l, *p;
2562
2563         assert(c);
2564         assert(path);
2565
2566         va_start(ap, path);
2567         l = strv_new_ap(path, ap);
2568         va_end(ap);
2569
2570         if (!l)
2571                 return -ENOMEM;
2572
2573         p = strdup(path);
2574         if (!p) {
2575                 strv_free(l);
2576                 return -ENOMEM;
2577         }
2578
2579         free(c->path);
2580         c->path = p;
2581
2582         strv_free(c->argv);
2583         c->argv = l;
2584
2585         return 0;
2586 }
2587
2588 int exec_command_append(ExecCommand *c, const char *path, ...) {
2589         _cleanup_strv_free_ char **l = NULL;
2590         va_list ap;
2591         int r;
2592
2593         assert(c);
2594         assert(path);
2595
2596         va_start(ap, path);
2597         l = strv_new_ap(path, ap);
2598         va_end(ap);
2599
2600         if (!l)
2601                 return -ENOMEM;
2602
2603         r = strv_extend_strv(&c->argv, l);
2604         if (r < 0)
2605                 return r;
2606
2607         return 0;
2608 }
2609
2610
2611 static int exec_runtime_allocate(ExecRuntime **rt) {
2612
2613         if (*rt)
2614                 return 0;
2615
2616         *rt = new0(ExecRuntime, 1);
2617         if (!*rt)
2618                 return -ENOMEM;
2619
2620         (*rt)->n_ref = 1;
2621         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2622
2623         return 0;
2624 }
2625
2626 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2627         int r;
2628
2629         assert(rt);
2630         assert(c);
2631         assert(id);
2632
2633         if (*rt)
2634                 return 1;
2635
2636         if (!c->private_network && !c->private_tmp)
2637                 return 0;
2638
2639         r = exec_runtime_allocate(rt);
2640         if (r < 0)
2641                 return r;
2642
2643         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2644                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2645                         return -errno;
2646         }
2647
2648         if (c->private_tmp && !(*rt)->tmp_dir) {
2649                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2650                 if (r < 0)
2651                         return r;
2652         }
2653
2654         return 1;
2655 }
2656
2657 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2658         assert(r);
2659         assert(r->n_ref > 0);
2660
2661         r->n_ref++;
2662         return r;
2663 }
2664
2665 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2666
2667         if (!r)
2668                 return NULL;
2669
2670         assert(r->n_ref > 0);
2671
2672         r->n_ref--;
2673         if (r->n_ref <= 0) {
2674                 free(r->tmp_dir);
2675                 free(r->var_tmp_dir);
2676                 safe_close_pair(r->netns_storage_socket);
2677                 free(r);
2678         }
2679
2680         return NULL;
2681 }
2682
2683 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2684         assert(u);
2685         assert(f);
2686         assert(fds);
2687
2688         if (!rt)
2689                 return 0;
2690
2691         if (rt->tmp_dir)
2692                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2693
2694         if (rt->var_tmp_dir)
2695                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2696
2697         if (rt->netns_storage_socket[0] >= 0) {
2698                 int copy;
2699
2700                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2701                 if (copy < 0)
2702                         return copy;
2703
2704                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2705         }
2706
2707         if (rt->netns_storage_socket[1] >= 0) {
2708                 int copy;
2709
2710                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2711                 if (copy < 0)
2712                         return copy;
2713
2714                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2715         }
2716
2717         return 0;
2718 }
2719
2720 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2721         int r;
2722
2723         assert(rt);
2724         assert(key);
2725         assert(value);
2726
2727         if (streq(key, "tmp-dir")) {
2728                 char *copy;
2729
2730                 r = exec_runtime_allocate(rt);
2731                 if (r < 0)
2732                         return r;
2733
2734                 copy = strdup(value);
2735                 if (!copy)
2736                         return log_oom();
2737
2738                 free((*rt)->tmp_dir);
2739                 (*rt)->tmp_dir = copy;
2740
2741         } else if (streq(key, "var-tmp-dir")) {
2742                 char *copy;
2743
2744                 r = exec_runtime_allocate(rt);
2745                 if (r < 0)
2746                         return r;
2747
2748                 copy = strdup(value);
2749                 if (!copy)
2750                         return log_oom();
2751
2752                 free((*rt)->var_tmp_dir);
2753                 (*rt)->var_tmp_dir = copy;
2754
2755         } else if (streq(key, "netns-socket-0")) {
2756                 int fd;
2757
2758                 r = exec_runtime_allocate(rt);
2759                 if (r < 0)
2760                         return r;
2761
2762                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2763                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2764                 else {
2765                         safe_close((*rt)->netns_storage_socket[0]);
2766                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2767                 }
2768         } else if (streq(key, "netns-socket-1")) {
2769                 int fd;
2770
2771                 r = exec_runtime_allocate(rt);
2772                 if (r < 0)
2773                         return r;
2774
2775                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2776                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2777                 else {
2778                         safe_close((*rt)->netns_storage_socket[1]);
2779                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2780                 }
2781         } else
2782                 return 0;
2783
2784         return 1;
2785 }
2786
2787 static void *remove_tmpdir_thread(void *p) {
2788         _cleanup_free_ char *path = p;
2789
2790         rm_rf_dangerous(path, false, true, false);
2791         return NULL;
2792 }
2793
2794 void exec_runtime_destroy(ExecRuntime *rt) {
2795         int r;
2796
2797         if (!rt)
2798                 return;
2799
2800         /* If there are multiple users of this, let's leave the stuff around */
2801         if (rt->n_ref > 1)
2802                 return;
2803
2804         if (rt->tmp_dir) {
2805                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2806
2807                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2808                 if (r < 0) {
2809                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2810                         free(rt->tmp_dir);
2811                 }
2812
2813                 rt->tmp_dir = NULL;
2814         }
2815
2816         if (rt->var_tmp_dir) {
2817                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2818
2819                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2820                 if (r < 0) {
2821                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2822                         free(rt->var_tmp_dir);
2823                 }
2824
2825                 rt->var_tmp_dir = NULL;
2826         }
2827
2828         safe_close_pair(rt->netns_storage_socket);
2829 }
2830
2831 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2832         [EXEC_INPUT_NULL] = "null",
2833         [EXEC_INPUT_TTY] = "tty",
2834         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2835         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2836         [EXEC_INPUT_SOCKET] = "socket"
2837 };
2838
2839 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2840
2841 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2842         [EXEC_OUTPUT_INHERIT] = "inherit",
2843         [EXEC_OUTPUT_NULL] = "null",
2844         [EXEC_OUTPUT_TTY] = "tty",
2845         [EXEC_OUTPUT_SYSLOG] = "syslog",
2846         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2847         [EXEC_OUTPUT_KMSG] = "kmsg",
2848         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2849         [EXEC_OUTPUT_JOURNAL] = "journal",
2850         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2851         [EXEC_OUTPUT_SOCKET] = "socket"
2852 };
2853
2854 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);