chiark / gitweb /
340b8000d4f224667a96863f5f9bfc2891955e0b
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #ifdef HAVE_SELINUX
49 #include <selinux/selinux.h>
50 #endif
51
52 #ifdef HAVE_SECCOMP
53 #include <seccomp.h>
54 #endif
55
56 #ifdef HAVE_APPARMOR
57 #include <sys/apparmor.h>
58 #endif
59
60 #include "execute.h"
61 #include "strv.h"
62 #include "macro.h"
63 #include "capability.h"
64 #include "util.h"
65 #include "log.h"
66 #include "sd-messages.h"
67 #include "ioprio.h"
68 #include "securebits.h"
69 #include "namespace.h"
70 #include "exit-status.h"
71 #include "missing.h"
72 #include "utmp-wtmp.h"
73 #include "def.h"
74 #include "path-util.h"
75 #include "env-util.h"
76 #include "fileio.h"
77 #include "unit.h"
78 #include "async.h"
79 #include "selinux-util.h"
80 #include "errno-list.h"
81 #include "af-list.h"
82 #include "mkdir.h"
83 #include "apparmor-util.h"
84 #include "smack-util.h"
85 #include "bus-endpoint.h"
86 #include "label.h"
87 #include "cap-list.h"
88
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95
96 /* This assumes there is a 'tty' group */
97 #define TTY_MODE 0620
98
99 #define SNDBUF_SIZE (8*1024*1024)
100
101 static int shift_fds(int fds[], unsigned n_fds) {
102         int start, restart_from;
103
104         if (n_fds <= 0)
105                 return 0;
106
107         /* Modifies the fds array! (sorts it) */
108
109         assert(fds);
110
111         start = 0;
112         for (;;) {
113                 int i;
114
115                 restart_from = -1;
116
117                 for (i = start; i < (int) n_fds; i++) {
118                         int nfd;
119
120                         /* Already at right index? */
121                         if (fds[i] == i+3)
122                                 continue;
123
124                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
125                                 return -errno;
126
127                         safe_close(fds[i]);
128                         fds[i] = nfd;
129
130                         /* Hmm, the fd we wanted isn't free? Then
131                          * let's remember that and try again from here */
132                         if (nfd != i+3 && restart_from < 0)
133                                 restart_from = i;
134                 }
135
136                 if (restart_from < 0)
137                         break;
138
139                 start = restart_from;
140         }
141
142         return 0;
143 }
144
145 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
146         unsigned i;
147         int r;
148
149         if (n_fds <= 0)
150                 return 0;
151
152         assert(fds);
153
154         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
155
156         for (i = 0; i < n_fds; i++) {
157
158                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159                         return r;
160
161                 /* We unconditionally drop FD_CLOEXEC from the fds,
162                  * since after all we want to pass these fds to our
163                  * children */
164
165                 if ((r = fd_cloexec(fds[i], false)) < 0)
166                         return r;
167         }
168
169         return 0;
170 }
171
172 _pure_ static const char *tty_path(const ExecContext *context) {
173         assert(context);
174
175         if (context->tty_path)
176                 return context->tty_path;
177
178         return "/dev/console";
179 }
180
181 static void exec_context_tty_reset(const ExecContext *context) {
182         assert(context);
183
184         if (context->tty_vhangup)
185                 terminal_vhangup(tty_path(context));
186
187         if (context->tty_reset)
188                 reset_terminal(tty_path(context));
189
190         if (context->tty_vt_disallocate && context->tty_path)
191                 vt_disallocate(context->tty_path);
192 }
193
194 static bool is_terminal_output(ExecOutput o) {
195         return
196                 o == EXEC_OUTPUT_TTY ||
197                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 }
201
202 static int open_null_as(int flags, int nfd) {
203         int fd, r;
204
205         assert(nfd >= 0);
206
207         fd = open("/dev/null", flags|O_NOCTTY);
208         if (fd < 0)
209                 return -errno;
210
211         if (fd != nfd) {
212                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
213                 safe_close(fd);
214         } else
215                 r = nfd;
216
217         return r;
218 }
219
220 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225         uid_t olduid = UID_INVALID;
226         gid_t oldgid = GID_INVALID;
227         int r;
228
229         if (gid != GID_INVALID) {
230                 oldgid = getgid();
231
232                 r = setegid(gid);
233                 if (r < 0)
234                         return -errno;
235         }
236
237         if (uid != UID_INVALID) {
238                 olduid = getuid();
239
240                 r = seteuid(uid);
241                 if (r < 0) {
242                         r = -errno;
243                         goto restore_gid;
244                 }
245         }
246
247         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
248         if (r < 0)
249                 r = -errno;
250
251         /* If we fail to restore the uid or gid, things will likely
252            fail later on. This should only happen if an LSM interferes. */
253
254         if (uid != UID_INVALID)
255                 (void) seteuid(olduid);
256
257  restore_gid:
258         if (gid != GID_INVALID)
259                 (void) setegid(oldgid);
260
261         return r;
262 }
263
264 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
265         int fd, r;
266
267         assert(context);
268         assert(output < _EXEC_OUTPUT_MAX);
269         assert(ident);
270         assert(nfd >= 0);
271
272         fd = socket(AF_UNIX, SOCK_STREAM, 0);
273         if (fd < 0)
274                 return -errno;
275
276         r = connect_journal_socket(fd, uid, gid);
277         if (r < 0)
278                 return r;
279
280         if (shutdown(fd, SHUT_RD) < 0) {
281                 safe_close(fd);
282                 return -errno;
283         }
284
285         fd_inc_sndbuf(fd, SNDBUF_SIZE);
286
287         dprintf(fd,
288                 "%s\n"
289                 "%s\n"
290                 "%i\n"
291                 "%i\n"
292                 "%i\n"
293                 "%i\n"
294                 "%i\n",
295                 context->syslog_identifier ? context->syslog_identifier : ident,
296                 unit_id,
297                 context->syslog_priority,
298                 !!context->syslog_level_prefix,
299                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
300                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
301                 is_terminal_output(output));
302
303         if (fd != nfd) {
304                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
305                 safe_close(fd);
306         } else
307                 r = nfd;
308
309         return r;
310 }
311 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
312         int fd, r;
313
314         assert(path);
315         assert(nfd >= 0);
316
317         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
318                 return fd;
319
320         if (fd != nfd) {
321                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
322                 safe_close(fd);
323         } else
324                 r = nfd;
325
326         return r;
327 }
328
329 static bool is_terminal_input(ExecInput i) {
330         return
331                 i == EXEC_INPUT_TTY ||
332                 i == EXEC_INPUT_TTY_FORCE ||
333                 i == EXEC_INPUT_TTY_FAIL;
334 }
335
336 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
337
338         if (is_terminal_input(std_input) && !apply_tty_stdin)
339                 return EXEC_INPUT_NULL;
340
341         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
342                 return EXEC_INPUT_NULL;
343
344         return std_input;
345 }
346
347 static int fixup_output(ExecOutput std_output, int socket_fd) {
348
349         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
350                 return EXEC_OUTPUT_INHERIT;
351
352         return std_output;
353 }
354
355 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
356         ExecInput i;
357
358         assert(context);
359
360         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
361
362         switch (i) {
363
364         case EXEC_INPUT_NULL:
365                 return open_null_as(O_RDONLY, STDIN_FILENO);
366
367         case EXEC_INPUT_TTY:
368         case EXEC_INPUT_TTY_FORCE:
369         case EXEC_INPUT_TTY_FAIL: {
370                 int fd, r;
371
372                 fd = acquire_terminal(tty_path(context),
373                                       i == EXEC_INPUT_TTY_FAIL,
374                                       i == EXEC_INPUT_TTY_FORCE,
375                                       false,
376                                       USEC_INFINITY);
377                 if (fd < 0)
378                         return fd;
379
380                 if (fd != STDIN_FILENO) {
381                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
382                         safe_close(fd);
383                 } else
384                         r = STDIN_FILENO;
385
386                 return r;
387         }
388
389         case EXEC_INPUT_SOCKET:
390                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
391
392         default:
393                 assert_not_reached("Unknown input type");
394         }
395 }
396
397 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
398         ExecOutput o;
399         ExecInput i;
400         int r;
401
402         assert(context);
403         assert(ident);
404
405         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
406         o = fixup_output(context->std_output, socket_fd);
407
408         if (fileno == STDERR_FILENO) {
409                 ExecOutput e;
410                 e = fixup_output(context->std_error, socket_fd);
411
412                 /* This expects the input and output are already set up */
413
414                 /* Don't change the stderr file descriptor if we inherit all
415                  * the way and are not on a tty */
416                 if (e == EXEC_OUTPUT_INHERIT &&
417                     o == EXEC_OUTPUT_INHERIT &&
418                     i == EXEC_INPUT_NULL &&
419                     !is_terminal_input(context->std_input) &&
420                     getppid () != 1)
421                         return fileno;
422
423                 /* Duplicate from stdout if possible */
424                 if (e == o || e == EXEC_OUTPUT_INHERIT)
425                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
426
427                 o = e;
428
429         } else if (o == EXEC_OUTPUT_INHERIT) {
430                 /* If input got downgraded, inherit the original value */
431                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
432                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
433
434                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
435                 if (i != EXEC_INPUT_NULL)
436                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
437
438                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
439                 if (getppid() != 1)
440                         return fileno;
441
442                 /* We need to open /dev/null here anew, to get the right access mode. */
443                 return open_null_as(O_WRONLY, fileno);
444         }
445
446         switch (o) {
447
448         case EXEC_OUTPUT_NULL:
449                 return open_null_as(O_WRONLY, fileno);
450
451         case EXEC_OUTPUT_TTY:
452                 if (is_terminal_input(i))
453                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
454
455                 /* We don't reset the terminal if this is just about output */
456                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
457
458         case EXEC_OUTPUT_SYSLOG:
459         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
460         case EXEC_OUTPUT_KMSG:
461         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
462         case EXEC_OUTPUT_JOURNAL:
463         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
464                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
465                 if (r < 0) {
466                         log_unit_struct(unit_id,
467                                         LOG_ERR,
468                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
469                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
470                                                     unit_id, strerror(-r)),
471                                         LOG_ERRNO(-r),
472                                         NULL);
473                         r = open_null_as(O_WRONLY, fileno);
474                 }
475                 return r;
476
477         case EXEC_OUTPUT_SOCKET:
478                 assert(socket_fd >= 0);
479                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
480
481         default:
482                 assert_not_reached("Unknown error type");
483         }
484 }
485
486 static int chown_terminal(int fd, uid_t uid) {
487         struct stat st;
488
489         assert(fd >= 0);
490
491         /* This might fail. What matters are the results. */
492         (void) fchown(fd, uid, -1);
493         (void) fchmod(fd, TTY_MODE);
494
495         if (fstat(fd, &st) < 0)
496                 return -errno;
497
498         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
499                 return -EPERM;
500
501         return 0;
502 }
503
504 static int setup_confirm_stdio(int *_saved_stdin,
505                                int *_saved_stdout) {
506         int fd = -1, saved_stdin, saved_stdout = -1, r;
507
508         assert(_saved_stdin);
509         assert(_saved_stdout);
510
511         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
512         if (saved_stdin < 0)
513                 return -errno;
514
515         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516         if (saved_stdout < 0) {
517                 r = errno;
518                 goto fail;
519         }
520
521         fd = acquire_terminal(
522                         "/dev/console",
523                         false,
524                         false,
525                         false,
526                         DEFAULT_CONFIRM_USEC);
527         if (fd < 0) {
528                 r = fd;
529                 goto fail;
530         }
531
532         r = chown_terminal(fd, getuid());
533         if (r < 0)
534                 goto fail;
535
536         if (dup2(fd, STDIN_FILENO) < 0) {
537                 r = -errno;
538                 goto fail;
539         }
540
541         if (dup2(fd, STDOUT_FILENO) < 0) {
542                 r = -errno;
543                 goto fail;
544         }
545
546         if (fd >= 2)
547                 safe_close(fd);
548
549         *_saved_stdin = saved_stdin;
550         *_saved_stdout = saved_stdout;
551
552         return 0;
553
554 fail:
555         safe_close(saved_stdout);
556         safe_close(saved_stdin);
557         safe_close(fd);
558
559         return r;
560 }
561
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563         _cleanup_close_ int fd = -1;
564         va_list ap;
565
566         assert(format);
567
568         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
569         if (fd < 0)
570                 return fd;
571
572         va_start(ap, format);
573         vdprintf(fd, format, ap);
574         va_end(ap);
575
576         return 0;
577 }
578
579 static int restore_confirm_stdio(int *saved_stdin,
580                                  int *saved_stdout) {
581
582         int r = 0;
583
584         assert(saved_stdin);
585         assert(saved_stdout);
586
587         release_terminal();
588
589         if (*saved_stdin >= 0)
590                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
591                         r = -errno;
592
593         if (*saved_stdout >= 0)
594                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
595                         r = -errno;
596
597         safe_close(*saved_stdin);
598         safe_close(*saved_stdout);
599
600         return r;
601 }
602
603 static int ask_for_confirmation(char *response, char **argv) {
604         int saved_stdout = -1, saved_stdin = -1, r;
605         _cleanup_free_ char *line = NULL;
606
607         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
608         if (r < 0)
609                 return r;
610
611         line = exec_command_line(argv);
612         if (!line)
613                 return -ENOMEM;
614
615         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
616
617         restore_confirm_stdio(&saved_stdin, &saved_stdout);
618
619         return r;
620 }
621
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623         bool keep_groups = false;
624         int r;
625
626         assert(context);
627
628         /* Lookup and set GID and supplementary group list. Here too
629          * we avoid NSS lookups for gid=0. */
630
631         if (context->group || username) {
632
633                 if (context->group) {
634                         const char *g = context->group;
635
636                         if ((r = get_group_creds(&g, &gid)) < 0)
637                                 return r;
638                 }
639
640                 /* First step, initialize groups from /etc/groups */
641                 if (username && gid != 0) {
642                         if (initgroups(username, gid) < 0)
643                                 return -errno;
644
645                         keep_groups = true;
646                 }
647
648                 /* Second step, set our gids */
649                 if (setresgid(gid, gid, gid) < 0)
650                         return -errno;
651         }
652
653         if (context->supplementary_groups) {
654                 int ngroups_max, k;
655                 gid_t *gids;
656                 char **i;
657
658                 /* Final step, initialize any manually set supplementary groups */
659                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
660
661                 if (!(gids = new(gid_t, ngroups_max)))
662                         return -ENOMEM;
663
664                 if (keep_groups) {
665                         if ((k = getgroups(ngroups_max, gids)) < 0) {
666                                 free(gids);
667                                 return -errno;
668                         }
669                 } else
670                         k = 0;
671
672                 STRV_FOREACH(i, context->supplementary_groups) {
673                         const char *g;
674
675                         if (k >= ngroups_max) {
676                                 free(gids);
677                                 return -E2BIG;
678                         }
679
680                         g = *i;
681                         r = get_group_creds(&g, gids+k);
682                         if (r < 0) {
683                                 free(gids);
684                                 return r;
685                         }
686
687                         k++;
688                 }
689
690                 if (setgroups(k, gids) < 0) {
691                         free(gids);
692                         return -errno;
693                 }
694
695                 free(gids);
696         }
697
698         return 0;
699 }
700
701 static int enforce_user(const ExecContext *context, uid_t uid) {
702         assert(context);
703
704         /* Sets (but doesn't lookup) the uid and make sure we keep the
705          * capabilities while doing so. */
706
707         if (context->capabilities) {
708                 _cleanup_cap_free_ cap_t d = NULL;
709                 static const cap_value_t bits[] = {
710                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
711                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
712                 };
713
714                 /* First step: If we need to keep capabilities but
715                  * drop privileges we need to make sure we keep our
716                  * caps, while we drop privileges. */
717                 if (uid != 0) {
718                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
719
720                         if (prctl(PR_GET_SECUREBITS) != sb)
721                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
722                                         return -errno;
723                 }
724
725                 /* Second step: set the capabilities. This will reduce
726                  * the capabilities to the minimum we need. */
727
728                 d = cap_dup(context->capabilities);
729                 if (!d)
730                         return -errno;
731
732                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
733                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
734                         return -errno;
735
736                 if (cap_set_proc(d) < 0)
737                         return -errno;
738         }
739
740         /* Third step: actually set the uids */
741         if (setresuid(uid, uid, uid) < 0)
742                 return -errno;
743
744         /* At this point we should have all necessary capabilities but
745            are otherwise a normal user. However, the caps might got
746            corrupted due to the setresuid() so we need clean them up
747            later. This is done outside of this call. */
748
749         return 0;
750 }
751
752 #ifdef HAVE_PAM
753
754 static int null_conv(
755                 int num_msg,
756                 const struct pam_message **msg,
757                 struct pam_response **resp,
758                 void *appdata_ptr) {
759
760         /* We don't support conversations */
761
762         return PAM_CONV_ERR;
763 }
764
765 static int setup_pam(
766                 const char *name,
767                 const char *user,
768                 uid_t uid,
769                 const char *tty,
770                 char ***pam_env,
771                 int fds[], unsigned n_fds) {
772
773         static const struct pam_conv conv = {
774                 .conv = null_conv,
775                 .appdata_ptr = NULL
776         };
777
778         pam_handle_t *handle = NULL;
779         sigset_t ss, old_ss;
780         int pam_code = PAM_SUCCESS;
781         int err;
782         char **e = NULL;
783         bool close_session = false;
784         pid_t pam_pid = 0, parent_pid;
785         int flags = 0;
786
787         assert(name);
788         assert(user);
789         assert(pam_env);
790
791         /* We set up PAM in the parent process, then fork. The child
792          * will then stay around until killed via PR_GET_PDEATHSIG or
793          * systemd via the cgroup logic. It will then remove the PAM
794          * session again. The parent process will exec() the actual
795          * daemon. We do things this way to ensure that the main PID
796          * of the daemon is the one we initially fork()ed. */
797
798         if (log_get_max_level() < LOG_DEBUG)
799                 flags |= PAM_SILENT;
800
801         pam_code = pam_start(name, user, &conv, &handle);
802         if (pam_code != PAM_SUCCESS) {
803                 handle = NULL;
804                 goto fail;
805         }
806
807         if (tty) {
808                 pam_code = pam_set_item(handle, PAM_TTY, tty);
809                 if (pam_code != PAM_SUCCESS)
810                         goto fail;
811         }
812
813         pam_code = pam_acct_mgmt(handle, flags);
814         if (pam_code != PAM_SUCCESS)
815                 goto fail;
816
817         pam_code = pam_open_session(handle, flags);
818         if (pam_code != PAM_SUCCESS)
819                 goto fail;
820
821         close_session = true;
822
823         e = pam_getenvlist(handle);
824         if (!e) {
825                 pam_code = PAM_BUF_ERR;
826                 goto fail;
827         }
828
829         /* Block SIGTERM, so that we know that it won't get lost in
830          * the child */
831         if (sigemptyset(&ss) < 0 ||
832             sigaddset(&ss, SIGTERM) < 0 ||
833             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
834                 goto fail;
835
836         parent_pid = getpid();
837
838         pam_pid = fork();
839         if (pam_pid < 0)
840                 goto fail;
841
842         if (pam_pid == 0) {
843                 int sig;
844                 int r = EXIT_PAM;
845
846                 /* The child's job is to reset the PAM session on
847                  * termination */
848
849                 /* This string must fit in 10 chars (i.e. the length
850                  * of "/sbin/init"), to look pretty in /bin/ps */
851                 rename_process("(sd-pam)");
852
853                 /* Make sure we don't keep open the passed fds in this
854                 child. We assume that otherwise only those fds are
855                 open here that have been opened by PAM. */
856                 close_many(fds, n_fds);
857
858                 /* Drop privileges - we don't need any to pam_close_session
859                  * and this will make PR_SET_PDEATHSIG work in most cases.
860                  * If this fails, ignore the error - but expect sd-pam threads
861                  * to fail to exit normally */
862                 if (setresuid(uid, uid, uid) < 0)
863                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
864
865                 /* Wait until our parent died. This will only work if
866                  * the above setresuid() succeeds, otherwise the kernel
867                  * will not allow unprivileged parents kill their privileged
868                  * children this way. We rely on the control groups kill logic
869                  * to do the rest for us. */
870                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
871                         goto child_finish;
872
873                 /* Check if our parent process might already have
874                  * died? */
875                 if (getppid() == parent_pid) {
876                         for (;;) {
877                                 if (sigwait(&ss, &sig) < 0) {
878                                         if (errno == EINTR)
879                                                 continue;
880
881                                         goto child_finish;
882                                 }
883
884                                 assert(sig == SIGTERM);
885                                 break;
886                         }
887                 }
888
889                 /* If our parent died we'll end the session */
890                 if (getppid() != parent_pid) {
891                         pam_code = pam_close_session(handle, flags);
892                         if (pam_code != PAM_SUCCESS)
893                                 goto child_finish;
894                 }
895
896                 r = 0;
897
898         child_finish:
899                 pam_end(handle, pam_code | flags);
900                 _exit(r);
901         }
902
903         /* If the child was forked off successfully it will do all the
904          * cleanups, so forget about the handle here. */
905         handle = NULL;
906
907         /* Unblock SIGTERM again in the parent */
908         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
909                 goto fail;
910
911         /* We close the log explicitly here, since the PAM modules
912          * might have opened it, but we don't want this fd around. */
913         closelog();
914
915         *pam_env = e;
916         e = NULL;
917
918         return 0;
919
920 fail:
921         if (pam_code != PAM_SUCCESS) {
922                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
923                 err = -EPERM;  /* PAM errors do not map to errno */
924         } else {
925                 log_error_errno(errno, "PAM failed: %m");
926                 err = -errno;
927         }
928
929         if (handle) {
930                 if (close_session)
931                         pam_code = pam_close_session(handle, flags);
932
933                 pam_end(handle, pam_code | flags);
934         }
935
936         strv_free(e);
937
938         closelog();
939
940         if (pam_pid > 1) {
941                 kill(pam_pid, SIGTERM);
942                 kill(pam_pid, SIGCONT);
943         }
944
945         return err;
946 }
947 #endif
948
949 static void rename_process_from_path(const char *path) {
950         char process_name[11];
951         const char *p;
952         size_t l;
953
954         /* This resulting string must fit in 10 chars (i.e. the length
955          * of "/sbin/init") to look pretty in /bin/ps */
956
957         p = basename(path);
958         if (isempty(p)) {
959                 rename_process("(...)");
960                 return;
961         }
962
963         l = strlen(p);
964         if (l > 8) {
965                 /* The end of the process name is usually more
966                  * interesting, since the first bit might just be
967                  * "systemd-" */
968                 p = p + l - 8;
969                 l = 8;
970         }
971
972         process_name[0] = '(';
973         memcpy(process_name+1, p, l);
974         process_name[1+l] = ')';
975         process_name[1+l+1] = 0;
976
977         rename_process(process_name);
978 }
979
980 #ifdef HAVE_SECCOMP
981
982 static int apply_seccomp(const ExecContext *c) {
983         uint32_t negative_action, action;
984         scmp_filter_ctx *seccomp;
985         Iterator i;
986         void *id;
987         int r;
988
989         assert(c);
990
991         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
992
993         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
994         if (!seccomp)
995                 return -ENOMEM;
996
997         if (c->syscall_archs) {
998
999                 SET_FOREACH(id, c->syscall_archs, i) {
1000                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1001                         if (r == -EEXIST)
1002                                 continue;
1003                         if (r < 0)
1004                                 goto finish;
1005                 }
1006
1007         } else {
1008                 r = seccomp_add_secondary_archs(seccomp);
1009                 if (r < 0)
1010                         goto finish;
1011         }
1012
1013         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1014         SET_FOREACH(id, c->syscall_filter, i) {
1015                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1016                 if (r < 0)
1017                         goto finish;
1018         }
1019
1020         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1021         if (r < 0)
1022                 goto finish;
1023
1024         r = seccomp_load(seccomp);
1025
1026 finish:
1027         seccomp_release(seccomp);
1028         return r;
1029 }
1030
1031 static int apply_address_families(const ExecContext *c) {
1032         scmp_filter_ctx *seccomp;
1033         Iterator i;
1034         int r;
1035
1036         assert(c);
1037
1038         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1039         if (!seccomp)
1040                 return -ENOMEM;
1041
1042         r = seccomp_add_secondary_archs(seccomp);
1043         if (r < 0)
1044                 goto finish;
1045
1046         if (c->address_families_whitelist) {
1047                 int af, first = 0, last = 0;
1048                 void *afp;
1049
1050                 /* If this is a whitelist, we first block the address
1051                  * families that are out of range and then everything
1052                  * that is not in the set. First, we find the lowest
1053                  * and highest address family in the set. */
1054
1055                 SET_FOREACH(afp, c->address_families, i) {
1056                         af = PTR_TO_INT(afp);
1057
1058                         if (af <= 0 || af >= af_max())
1059                                 continue;
1060
1061                         if (first == 0 || af < first)
1062                                 first = af;
1063
1064                         if (last == 0 || af > last)
1065                                 last = af;
1066                 }
1067
1068                 assert((first == 0) == (last == 0));
1069
1070                 if (first == 0) {
1071
1072                         /* No entries in the valid range, block everything */
1073                         r = seccomp_rule_add(
1074                                         seccomp,
1075                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076                                         SCMP_SYS(socket),
1077                                         0);
1078                         if (r < 0)
1079                                 goto finish;
1080
1081                 } else {
1082
1083                         /* Block everything below the first entry */
1084                         r = seccomp_rule_add(
1085                                         seccomp,
1086                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1087                                         SCMP_SYS(socket),
1088                                         1,
1089                                         SCMP_A0(SCMP_CMP_LT, first));
1090                         if (r < 0)
1091                                 goto finish;
1092
1093                         /* Block everything above the last entry */
1094                         r = seccomp_rule_add(
1095                                         seccomp,
1096                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097                                         SCMP_SYS(socket),
1098                                         1,
1099                                         SCMP_A0(SCMP_CMP_GT, last));
1100                         if (r < 0)
1101                                 goto finish;
1102
1103                         /* Block everything between the first and last
1104                          * entry */
1105                         for (af = 1; af < af_max(); af++) {
1106
1107                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1108                                         continue;
1109
1110                                 r = seccomp_rule_add(
1111                                                 seccomp,
1112                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1113                                                 SCMP_SYS(socket),
1114                                                 1,
1115                                                 SCMP_A0(SCMP_CMP_EQ, af));
1116                                 if (r < 0)
1117                                         goto finish;
1118                         }
1119                 }
1120
1121         } else {
1122                 void *af;
1123
1124                 /* If this is a blacklist, then generate one rule for
1125                  * each address family that are then combined in OR
1126                  * checks. */
1127
1128                 SET_FOREACH(af, c->address_families, i) {
1129
1130                         r = seccomp_rule_add(
1131                                         seccomp,
1132                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1133                                         SCMP_SYS(socket),
1134                                         1,
1135                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1136                         if (r < 0)
1137                                 goto finish;
1138                 }
1139         }
1140
1141         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1142         if (r < 0)
1143                 goto finish;
1144
1145         r = seccomp_load(seccomp);
1146
1147 finish:
1148         seccomp_release(seccomp);
1149         return r;
1150 }
1151
1152 #endif
1153
1154 static void do_idle_pipe_dance(int idle_pipe[4]) {
1155         assert(idle_pipe);
1156
1157
1158         safe_close(idle_pipe[1]);
1159         safe_close(idle_pipe[2]);
1160
1161         if (idle_pipe[0] >= 0) {
1162                 int r;
1163
1164                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1165
1166                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1167                         /* Signal systemd that we are bored and want to continue. */
1168                         write(idle_pipe[3], "x", 1);
1169
1170                         /* Wait for systemd to react to the signal above. */
1171                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1172                 }
1173
1174                 safe_close(idle_pipe[0]);
1175
1176         }
1177
1178         safe_close(idle_pipe[3]);
1179 }
1180
1181 static int build_environment(
1182                 const ExecContext *c,
1183                 unsigned n_fds,
1184                 usec_t watchdog_usec,
1185                 const char *home,
1186                 const char *username,
1187                 const char *shell,
1188                 char ***ret) {
1189
1190         _cleanup_strv_free_ char **our_env = NULL;
1191         unsigned n_env = 0;
1192         char *x;
1193
1194         assert(c);
1195         assert(ret);
1196
1197         our_env = new0(char*, 10);
1198         if (!our_env)
1199                 return -ENOMEM;
1200
1201         if (n_fds > 0) {
1202                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205
1206                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1207                         return -ENOMEM;
1208                 our_env[n_env++] = x;
1209         }
1210
1211         if (watchdog_usec > 0) {
1212                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1213                         return -ENOMEM;
1214                 our_env[n_env++] = x;
1215
1216                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1217                         return -ENOMEM;
1218                 our_env[n_env++] = x;
1219         }
1220
1221         if (home) {
1222                 x = strappend("HOME=", home);
1223                 if (!x)
1224                         return -ENOMEM;
1225                 our_env[n_env++] = x;
1226         }
1227
1228         if (username) {
1229                 x = strappend("LOGNAME=", username);
1230                 if (!x)
1231                         return -ENOMEM;
1232                 our_env[n_env++] = x;
1233
1234                 x = strappend("USER=", username);
1235                 if (!x)
1236                         return -ENOMEM;
1237                 our_env[n_env++] = x;
1238         }
1239
1240         if (shell) {
1241                 x = strappend("SHELL=", shell);
1242                 if (!x)
1243                         return -ENOMEM;
1244                 our_env[n_env++] = x;
1245         }
1246
1247         if (is_terminal_input(c->std_input) ||
1248             c->std_output == EXEC_OUTPUT_TTY ||
1249             c->std_error == EXEC_OUTPUT_TTY ||
1250             c->tty_path) {
1251
1252                 x = strdup(default_term_for_tty(tty_path(c)));
1253                 if (!x)
1254                         return -ENOMEM;
1255                 our_env[n_env++] = x;
1256         }
1257
1258         our_env[n_env++] = NULL;
1259         assert(n_env <= 10);
1260
1261         *ret = our_env;
1262         our_env = NULL;
1263
1264         return 0;
1265 }
1266
1267 static int exec_child(
1268                 ExecCommand *command,
1269                 const ExecContext *context,
1270                 const ExecParameters *params,
1271                 ExecRuntime *runtime,
1272                 char **argv,
1273                 int socket_fd,
1274                 int *fds, unsigned n_fds,
1275                 char **files_env,
1276                 int *exit_status) {
1277
1278         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1279         _cleanup_free_ char *mac_selinux_context_net = NULL;
1280         const char *username = NULL, *home = NULL, *shell = NULL;
1281         unsigned n_dont_close = 0;
1282         int dont_close[n_fds + 4];
1283         uid_t uid = UID_INVALID;
1284         gid_t gid = GID_INVALID;
1285         int i, r;
1286
1287         assert(command);
1288         assert(context);
1289         assert(params);
1290         assert(exit_status);
1291
1292         rename_process_from_path(command->path);
1293
1294         /* We reset exactly these signals, since they are the
1295          * only ones we set to SIG_IGN in the main daemon. All
1296          * others we leave untouched because we set them to
1297          * SIG_DFL or a valid handler initially, both of which
1298          * will be demoted to SIG_DFL. */
1299         default_signals(SIGNALS_CRASH_HANDLER,
1300                         SIGNALS_IGNORE, -1);
1301
1302         if (context->ignore_sigpipe)
1303                 ignore_signals(SIGPIPE, -1);
1304
1305         r = reset_signal_mask();
1306         if (r < 0) {
1307                 *exit_status = EXIT_SIGNAL_MASK;
1308                 return r;
1309         }
1310
1311         if (params->idle_pipe)
1312                 do_idle_pipe_dance(params->idle_pipe);
1313
1314         /* Close sockets very early to make sure we don't
1315          * block init reexecution because it cannot bind its
1316          * sockets */
1317
1318         log_forget_fds();
1319
1320         if (socket_fd >= 0)
1321                 dont_close[n_dont_close++] = socket_fd;
1322         if (n_fds > 0) {
1323                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324                 n_dont_close += n_fds;
1325         }
1326         if (params->bus_endpoint_fd >= 0)
1327                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1328         if (runtime) {
1329                 if (runtime->netns_storage_socket[0] >= 0)
1330                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331                 if (runtime->netns_storage_socket[1] >= 0)
1332                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1333         }
1334
1335         r = close_all_fds(dont_close, n_dont_close);
1336         if (r < 0) {
1337                 *exit_status = EXIT_FDS;
1338                 return r;
1339         }
1340
1341         if (!context->same_pgrp)
1342                 if (setsid() < 0) {
1343                         *exit_status = EXIT_SETSID;
1344                         return -errno;
1345                 }
1346
1347         exec_context_tty_reset(context);
1348
1349         if (params->confirm_spawn) {
1350                 char response;
1351
1352                 r = ask_for_confirmation(&response, argv);
1353                 if (r == -ETIMEDOUT)
1354                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1355                 else if (r < 0)
1356                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1357                 else if (response == 's') {
1358                         write_confirm_message("Skipping execution.\n");
1359                         *exit_status = EXIT_CONFIRM;
1360                         return -ECANCELED;
1361                 } else if (response == 'n') {
1362                         write_confirm_message("Failing execution.\n");
1363                         *exit_status = 0;
1364                         return 0;
1365                 }
1366         }
1367
1368         if (context->user) {
1369                 username = context->user;
1370                 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1371                 if (r < 0) {
1372                         *exit_status = EXIT_USER;
1373                         return r;
1374                 }
1375         }
1376
1377         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378          * must sure to drop O_NONBLOCK */
1379         if (socket_fd >= 0)
1380                 fd_nonblock(socket_fd, false);
1381
1382         r = setup_input(context, socket_fd, params->apply_tty_stdin);
1383         if (r < 0) {
1384                 *exit_status = EXIT_STDIN;
1385                 return r;
1386         }
1387
1388         r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389         if (r < 0) {
1390                 *exit_status = EXIT_STDOUT;
1391                 return r;
1392         }
1393
1394         r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1395         if (r < 0) {
1396                 *exit_status = EXIT_STDERR;
1397                 return r;
1398         }
1399
1400         if (params->cgroup_path) {
1401                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1402                 if (r < 0) {
1403                         *exit_status = EXIT_CGROUP;
1404                         return r;
1405                 }
1406         }
1407
1408         if (context->oom_score_adjust_set) {
1409                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1410
1411                 /* When we can't make this change due to EPERM, then
1412                  * let's silently skip over it. User namespaces
1413                  * prohibit write access to this file, and we
1414                  * shouldn't trip up over that. */
1415
1416                 sprintf(t, "%i", context->oom_score_adjust);
1417                 r = write_string_file("/proc/self/oom_score_adj", t);
1418                 if (r == -EPERM || r == -EACCES) {
1419                         log_open();
1420                         log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1421                         log_close();
1422                 } else if (r < 0) {
1423                         *exit_status = EXIT_OOM_ADJUST;
1424                         return -errno;
1425                 }
1426         }
1427
1428         if (context->nice_set)
1429                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1430                         *exit_status = EXIT_NICE;
1431                         return -errno;
1432                 }
1433
1434         if (context->cpu_sched_set) {
1435                 struct sched_param param = {
1436                         .sched_priority = context->cpu_sched_priority,
1437                 };
1438
1439                 r = sched_setscheduler(0,
1440                                        context->cpu_sched_policy |
1441                                        (context->cpu_sched_reset_on_fork ?
1442                                         SCHED_RESET_ON_FORK : 0),
1443                                        &param);
1444                 if (r < 0) {
1445                         *exit_status = EXIT_SETSCHEDULER;
1446                         return -errno;
1447                 }
1448         }
1449
1450         if (context->cpuset)
1451                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1452                         *exit_status = EXIT_CPUAFFINITY;
1453                         return -errno;
1454                 }
1455
1456         if (context->ioprio_set)
1457                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1458                         *exit_status = EXIT_IOPRIO;
1459                         return -errno;
1460                 }
1461
1462         if (context->timer_slack_nsec != NSEC_INFINITY)
1463                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1464                         *exit_status = EXIT_TIMERSLACK;
1465                         return -errno;
1466                 }
1467
1468         if (context->personality != 0xffffffffUL)
1469                 if (personality(context->personality) < 0) {
1470                         *exit_status = EXIT_PERSONALITY;
1471                         return -errno;
1472                 }
1473
1474         if (context->utmp_id)
1475                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1476
1477         if (context->user && is_terminal_input(context->std_input)) {
1478                 r = chown_terminal(STDIN_FILENO, uid);
1479                 if (r < 0) {
1480                         *exit_status = EXIT_STDIN;
1481                         return r;
1482                 }
1483         }
1484
1485 #ifdef ENABLE_KDBUS
1486         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1487                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1488
1489                 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1490                 if (r < 0) {
1491                         *exit_status = EXIT_BUS_ENDPOINT;
1492                         return r;
1493                 }
1494         }
1495 #endif
1496
1497         /* If delegation is enabled we'll pass ownership of the cgroup
1498          * (but only in systemd's own controller hierarchy!) to the
1499          * user of the new process. */
1500         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1501                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1502                 if (r < 0) {
1503                         *exit_status = EXIT_CGROUP;
1504                         return r;
1505                 }
1506
1507
1508                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1509                 if (r < 0) {
1510                         *exit_status = EXIT_CGROUP;
1511                         return r;
1512                 }
1513         }
1514
1515         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1516                 char **rt;
1517
1518                 STRV_FOREACH(rt, context->runtime_directory) {
1519                         _cleanup_free_ char *p;
1520
1521                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1522                         if (!p) {
1523                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1524                                 return -ENOMEM;
1525                         }
1526
1527                         r = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1528                         if (r < 0) {
1529                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1530                                 return r;
1531                         }
1532                 }
1533         }
1534
1535         if (params->apply_permissions) {
1536                 r = enforce_groups(context, username, gid);
1537                 if (r < 0) {
1538                         *exit_status = EXIT_GROUP;
1539                         return r;
1540                 }
1541         }
1542
1543         umask(context->umask);
1544
1545 #ifdef HAVE_PAM
1546         if (params->apply_permissions && context->pam_name && username) {
1547                 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1548                 if (r < 0) {
1549                         *exit_status = EXIT_PAM;
1550                         return r;
1551                 }
1552         }
1553 #endif
1554
1555         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1556                 r = setup_netns(runtime->netns_storage_socket);
1557                 if (r < 0) {
1558                         *exit_status = EXIT_NETWORK;
1559                         return r;
1560                 }
1561         }
1562
1563         if (!strv_isempty(context->read_write_dirs) ||
1564             !strv_isempty(context->read_only_dirs) ||
1565             !strv_isempty(context->inaccessible_dirs) ||
1566             context->mount_flags != 0 ||
1567             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1568             params->bus_endpoint_path ||
1569             context->private_devices ||
1570             context->protect_system != PROTECT_SYSTEM_NO ||
1571             context->protect_home != PROTECT_HOME_NO) {
1572
1573                 char *tmp = NULL, *var = NULL;
1574
1575                 /* The runtime struct only contains the parent
1576                  * of the private /tmp, which is
1577                  * non-accessible to world users. Inside of it
1578                  * there's a /tmp that is sticky, and that's
1579                  * the one we want to use here. */
1580
1581                 if (context->private_tmp && runtime) {
1582                         if (runtime->tmp_dir)
1583                                 tmp = strjoina(runtime->tmp_dir, "/tmp");
1584                         if (runtime->var_tmp_dir)
1585                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
1586                 }
1587
1588                 r = setup_namespace(
1589                                 context->read_write_dirs,
1590                                 context->read_only_dirs,
1591                                 context->inaccessible_dirs,
1592                                 tmp,
1593                                 var,
1594                                 params->bus_endpoint_path,
1595                                 context->private_devices,
1596                                 context->protect_home,
1597                                 context->protect_system,
1598                                 context->mount_flags);
1599
1600                 /* If we couldn't set up the namespace this is
1601                  * probably due to a missing capability. In this case,
1602                  * silently proceeed. */
1603                 if (r == -EPERM || r == -EACCES) {
1604                         log_open();
1605                         log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1606                         log_close();
1607                 } else if (r < 0) {
1608                         *exit_status = EXIT_NAMESPACE;
1609                         return r;
1610                 }
1611         }
1612
1613         if (params->apply_chroot) {
1614                 if (context->root_directory)
1615                         if (chroot(context->root_directory) < 0) {
1616                                 *exit_status = EXIT_CHROOT;
1617                                 return -errno;
1618                         }
1619
1620                 if (chdir(context->working_directory ?: "/") < 0 &&
1621                     !context->working_directory_missing_ok) {
1622                         *exit_status = EXIT_CHDIR;
1623                         return -errno;
1624                 }
1625         } else {
1626                 _cleanup_free_ char *d = NULL;
1627
1628                 if (asprintf(&d, "%s/%s",
1629                              context->root_directory ?: "",
1630                              context->working_directory ?: "") < 0) {
1631                         *exit_status = EXIT_MEMORY;
1632                         return -ENOMEM;
1633                 }
1634
1635                 if (chdir(d) < 0 &&
1636                     !context->working_directory_missing_ok) {
1637                         *exit_status = EXIT_CHDIR;
1638                         return -errno;
1639                 }
1640         }
1641
1642 #ifdef HAVE_SELINUX
1643         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1644                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1645                 if (r < 0) {
1646                         *exit_status = EXIT_SELINUX_CONTEXT;
1647                         return r;
1648                 }
1649         }
1650 #endif
1651
1652         /* We repeat the fd closing here, to make sure that
1653          * nothing is leaked from the PAM modules. Note that
1654          * we are more aggressive this time since socket_fd
1655          * and the netns fds we don't need anymore. The custom
1656          * endpoint fd was needed to upload the policy and can
1657          * now be closed as well. */
1658         r = close_all_fds(fds, n_fds);
1659         if (r >= 0)
1660                 r = shift_fds(fds, n_fds);
1661         if (r >= 0)
1662                 r = flags_fds(fds, n_fds, context->non_blocking);
1663         if (r < 0) {
1664                 *exit_status = EXIT_FDS;
1665                 return r;
1666         }
1667
1668         if (params->apply_permissions) {
1669
1670                 for (i = 0; i < _RLIMIT_MAX; i++) {
1671                         if (!context->rlimit[i])
1672                                 continue;
1673
1674                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1675                                 *exit_status = EXIT_LIMITS;
1676                                 return -errno;
1677                         }
1678                 }
1679
1680                 if (context->capability_bounding_set_drop) {
1681                         r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1682                         if (r < 0) {
1683                                 *exit_status = EXIT_CAPABILITIES;
1684                                 return r;
1685                         }
1686                 }
1687
1688 #ifdef HAVE_SMACK
1689                 if (context->smack_process_label) {
1690                         r = mac_smack_apply_pid(0, context->smack_process_label);
1691                         if (r < 0) {
1692                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1693                                 return r;
1694                         }
1695                 }
1696 #endif
1697
1698                 if (context->user) {
1699                         r = enforce_user(context, uid);
1700                         if (r < 0) {
1701                                 *exit_status = EXIT_USER;
1702                                 return r;
1703                         }
1704                 }
1705
1706                 /* PR_GET_SECUREBITS is not privileged, while
1707                  * PR_SET_SECUREBITS is. So to suppress
1708                  * potential EPERMs we'll try not to call
1709                  * PR_SET_SECUREBITS unless necessary. */
1710                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1711                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1712                                 *exit_status = EXIT_SECUREBITS;
1713                                 return -errno;
1714                         }
1715
1716                 if (context->capabilities)
1717                         if (cap_set_proc(context->capabilities) < 0) {
1718                                 *exit_status = EXIT_CAPABILITIES;
1719                                 return -errno;
1720                         }
1721
1722                 if (context->no_new_privileges)
1723                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1724                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1725                                 return -errno;
1726                         }
1727
1728 #ifdef HAVE_SECCOMP
1729                 if (context->address_families_whitelist ||
1730                     !set_isempty(context->address_families)) {
1731                         r = apply_address_families(context);
1732                         if (r < 0) {
1733                                 *exit_status = EXIT_ADDRESS_FAMILIES;
1734                                 return r;
1735                         }
1736                 }
1737
1738                 if (context->syscall_whitelist ||
1739                     !set_isempty(context->syscall_filter) ||
1740                     !set_isempty(context->syscall_archs)) {
1741                         r = apply_seccomp(context);
1742                         if (r < 0) {
1743                                 *exit_status = EXIT_SECCOMP;
1744                                 return r;
1745                         }
1746                 }
1747 #endif
1748
1749 #ifdef HAVE_SELINUX
1750                 if (mac_selinux_use()) {
1751                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1752
1753                         if (exec_context) {
1754                                 r = setexeccon(exec_context);
1755                                 if (r < 0) {
1756                                         *exit_status = EXIT_SELINUX_CONTEXT;
1757                                         return r;
1758                                 }
1759                         }
1760                 }
1761 #endif
1762
1763 #ifdef HAVE_APPARMOR
1764                 if (context->apparmor_profile && mac_apparmor_use()) {
1765                         r = aa_change_onexec(context->apparmor_profile);
1766                         if (r < 0 && !context->apparmor_profile_ignore) {
1767                                 *exit_status = EXIT_APPARMOR_PROFILE;
1768                                 return -errno;
1769                         }
1770                 }
1771 #endif
1772         }
1773
1774         r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1775         if (r < 0) {
1776                 *exit_status = EXIT_MEMORY;
1777                 return r;
1778         }
1779
1780         final_env = strv_env_merge(5,
1781                                    params->environment,
1782                                    our_env,
1783                                    context->environment,
1784                                    files_env,
1785                                    pam_env,
1786                                    NULL);
1787         if (!final_env) {
1788                 *exit_status = EXIT_MEMORY;
1789                 return -ENOMEM;
1790         }
1791
1792         final_argv = replace_env_argv(argv, final_env);
1793         if (!final_argv) {
1794                 *exit_status = EXIT_MEMORY;
1795                 return -ENOMEM;
1796         }
1797
1798         final_env = strv_env_clean(final_env);
1799
1800         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1801                 _cleanup_free_ char *line;
1802
1803                 line = exec_command_line(final_argv);
1804                 if (line) {
1805                         log_open();
1806                         log_unit_struct(params->unit_id,
1807                                         LOG_DEBUG,
1808                                         "EXECUTABLE=%s", command->path,
1809                                         LOG_MESSAGE("Executing: %s", line),
1810                                         NULL);
1811                         log_close();
1812                 }
1813         }
1814         execve(command->path, final_argv, final_env);
1815         *exit_status = EXIT_EXEC;
1816         return -errno;
1817 }
1818
1819 int exec_spawn(ExecCommand *command,
1820                const ExecContext *context,
1821                const ExecParameters *params,
1822                ExecRuntime *runtime,
1823                pid_t *ret) {
1824
1825         _cleanup_strv_free_ char **files_env = NULL;
1826         int *fds = NULL; unsigned n_fds = 0;
1827         _cleanup_free_ char *line = NULL;
1828         int socket_fd, r;
1829         char **argv;
1830         pid_t pid;
1831
1832         assert(command);
1833         assert(context);
1834         assert(ret);
1835         assert(params);
1836         assert(params->fds || params->n_fds <= 0);
1837
1838         if (context->std_input == EXEC_INPUT_SOCKET ||
1839             context->std_output == EXEC_OUTPUT_SOCKET ||
1840             context->std_error == EXEC_OUTPUT_SOCKET) {
1841
1842                 if (params->n_fds != 1) {
1843                         log_unit_error(params->unit_id, "Got more than one socket.");
1844                         return -EINVAL;
1845                 }
1846
1847                 socket_fd = params->fds[0];
1848         } else {
1849                 socket_fd = -1;
1850                 fds = params->fds;
1851                 n_fds = params->n_fds;
1852         }
1853
1854         r = exec_context_load_environment(context, params->unit_id, &files_env);
1855         if (r < 0)
1856                 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1857
1858         argv = params->argv ?: command->argv;
1859         line = exec_command_line(argv);
1860         if (!line)
1861                 return log_oom();
1862
1863         log_unit_struct(params->unit_id,
1864                         LOG_DEBUG,
1865                         "EXECUTABLE=%s", command->path,
1866                         LOG_MESSAGE("About to execute: %s", line),
1867                         NULL);
1868         pid = fork();
1869         if (pid < 0)
1870                 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1871
1872         if (pid == 0) {
1873                 int exit_status;
1874
1875                 r = exec_child(command,
1876                                context,
1877                                params,
1878                                runtime,
1879                                argv,
1880                                socket_fd,
1881                                fds, n_fds,
1882                                files_env,
1883                                &exit_status);
1884                 if (r < 0) {
1885                         log_open();
1886                         log_unit_struct(params->unit_id,
1887                                         LOG_ERR,
1888                                         LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1889                                         "EXECUTABLE=%s", command->path,
1890                                         LOG_MESSAGE("Failed at step %s spawning %s: %s",
1891                                                     exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1892                                                     command->path, strerror(-r)),
1893                                         LOG_ERRNO(r),
1894                                         NULL);
1895                 }
1896
1897                 _exit(exit_status);
1898         }
1899
1900         log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1901
1902         /* We add the new process to the cgroup both in the child (so
1903          * that we can be sure that no user code is ever executed
1904          * outside of the cgroup) and in the parent (so that we can be
1905          * sure that when we kill the cgroup the process will be
1906          * killed too). */
1907         if (params->cgroup_path)
1908                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1909
1910         exec_status_start(&command->exec_status, pid);
1911
1912         *ret = pid;
1913         return 0;
1914 }
1915
1916 void exec_context_init(ExecContext *c) {
1917         assert(c);
1918
1919         c->umask = 0022;
1920         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1921         c->cpu_sched_policy = SCHED_OTHER;
1922         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1923         c->syslog_level_prefix = true;
1924         c->ignore_sigpipe = true;
1925         c->timer_slack_nsec = NSEC_INFINITY;
1926         c->personality = 0xffffffffUL;
1927         c->runtime_directory_mode = 0755;
1928 }
1929
1930 void exec_context_done(ExecContext *c) {
1931         unsigned l;
1932
1933         assert(c);
1934
1935         strv_free(c->environment);
1936         c->environment = NULL;
1937
1938         strv_free(c->environment_files);
1939         c->environment_files = NULL;
1940
1941         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1942                 free(c->rlimit[l]);
1943                 c->rlimit[l] = NULL;
1944         }
1945
1946         free(c->working_directory);
1947         c->working_directory = NULL;
1948         free(c->root_directory);
1949         c->root_directory = NULL;
1950
1951         free(c->tty_path);
1952         c->tty_path = NULL;
1953
1954         free(c->syslog_identifier);
1955         c->syslog_identifier = NULL;
1956
1957         free(c->user);
1958         c->user = NULL;
1959
1960         free(c->group);
1961         c->group = NULL;
1962
1963         strv_free(c->supplementary_groups);
1964         c->supplementary_groups = NULL;
1965
1966         free(c->pam_name);
1967         c->pam_name = NULL;
1968
1969         if (c->capabilities) {
1970                 cap_free(c->capabilities);
1971                 c->capabilities = NULL;
1972         }
1973
1974         strv_free(c->read_only_dirs);
1975         c->read_only_dirs = NULL;
1976
1977         strv_free(c->read_write_dirs);
1978         c->read_write_dirs = NULL;
1979
1980         strv_free(c->inaccessible_dirs);
1981         c->inaccessible_dirs = NULL;
1982
1983         if (c->cpuset)
1984                 CPU_FREE(c->cpuset);
1985
1986         free(c->utmp_id);
1987         c->utmp_id = NULL;
1988
1989         free(c->selinux_context);
1990         c->selinux_context = NULL;
1991
1992         free(c->apparmor_profile);
1993         c->apparmor_profile = NULL;
1994
1995         set_free(c->syscall_filter);
1996         c->syscall_filter = NULL;
1997
1998         set_free(c->syscall_archs);
1999         c->syscall_archs = NULL;
2000
2001         set_free(c->address_families);
2002         c->address_families = NULL;
2003
2004         strv_free(c->runtime_directory);
2005         c->runtime_directory = NULL;
2006
2007         bus_endpoint_free(c->bus_endpoint);
2008         c->bus_endpoint = NULL;
2009 }
2010
2011 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2012         char **i;
2013
2014         assert(c);
2015
2016         if (!runtime_prefix)
2017                 return 0;
2018
2019         STRV_FOREACH(i, c->runtime_directory) {
2020                 _cleanup_free_ char *p;
2021
2022                 p = strjoin(runtime_prefix, "/", *i, NULL);
2023                 if (!p)
2024                         return -ENOMEM;
2025
2026                 /* We execute this synchronously, since we need to be
2027                  * sure this is gone when we start the service
2028                  * next. */
2029                 rm_rf(p, false, true, false);
2030         }
2031
2032         return 0;
2033 }
2034
2035 void exec_command_done(ExecCommand *c) {
2036         assert(c);
2037
2038         free(c->path);
2039         c->path = NULL;
2040
2041         strv_free(c->argv);
2042         c->argv = NULL;
2043 }
2044
2045 void exec_command_done_array(ExecCommand *c, unsigned n) {
2046         unsigned i;
2047
2048         for (i = 0; i < n; i++)
2049                 exec_command_done(c+i);
2050 }
2051
2052 ExecCommand* exec_command_free_list(ExecCommand *c) {
2053         ExecCommand *i;
2054
2055         while ((i = c)) {
2056                 LIST_REMOVE(command, c, i);
2057                 exec_command_done(i);
2058                 free(i);
2059         }
2060
2061         return NULL;
2062 }
2063
2064 void exec_command_free_array(ExecCommand **c, unsigned n) {
2065         unsigned i;
2066
2067         for (i = 0; i < n; i++)
2068                 c[i] = exec_command_free_list(c[i]);
2069 }
2070
2071 typedef struct InvalidEnvInfo {
2072         const char *unit_id;
2073         const char *path;
2074 } InvalidEnvInfo;
2075
2076 static void invalid_env(const char *p, void *userdata) {
2077         InvalidEnvInfo *info = userdata;
2078
2079         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2080 }
2081
2082 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2083         char **i, **r = NULL;
2084
2085         assert(c);
2086         assert(l);
2087
2088         STRV_FOREACH(i, c->environment_files) {
2089                 char *fn;
2090                 int k;
2091                 bool ignore = false;
2092                 char **p;
2093                 _cleanup_globfree_ glob_t pglob = {};
2094                 int count, n;
2095
2096                 fn = *i;
2097
2098                 if (fn[0] == '-') {
2099                         ignore = true;
2100                         fn ++;
2101                 }
2102
2103                 if (!path_is_absolute(fn)) {
2104                         if (ignore)
2105                                 continue;
2106
2107                         strv_free(r);
2108                         return -EINVAL;
2109                 }
2110
2111                 /* Filename supports globbing, take all matching files */
2112                 errno = 0;
2113                 if (glob(fn, 0, NULL, &pglob) != 0) {
2114                         if (ignore)
2115                                 continue;
2116
2117                         strv_free(r);
2118                         return errno ? -errno : -EINVAL;
2119                 }
2120                 count = pglob.gl_pathc;
2121                 if (count == 0) {
2122                         if (ignore)
2123                                 continue;
2124
2125                         strv_free(r);
2126                         return -EINVAL;
2127                 }
2128                 for (n = 0; n < count; n++) {
2129                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2130                         if (k < 0) {
2131                                 if (ignore)
2132                                         continue;
2133
2134                                 strv_free(r);
2135                                 return k;
2136                         }
2137                         /* Log invalid environment variables with filename */
2138                         if (p) {
2139                                 InvalidEnvInfo info = {
2140                                         .unit_id = unit_id,
2141                                         .path = pglob.gl_pathv[n]
2142                                 };
2143
2144                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2145                         }
2146
2147                         if (r == NULL)
2148                                 r = p;
2149                         else {
2150                                 char **m;
2151
2152                                 m = strv_env_merge(2, r, p);
2153                                 strv_free(r);
2154                                 strv_free(p);
2155                                 if (!m)
2156                                         return -ENOMEM;
2157
2158                                 r = m;
2159                         }
2160                 }
2161         }
2162
2163         *l = r;
2164
2165         return 0;
2166 }
2167
2168 static bool tty_may_match_dev_console(const char *tty) {
2169         _cleanup_free_ char *active = NULL;
2170        char *console;
2171
2172         if (startswith(tty, "/dev/"))
2173                 tty += 5;
2174
2175         /* trivial identity? */
2176         if (streq(tty, "console"))
2177                 return true;
2178
2179         console = resolve_dev_console(&active);
2180         /* if we could not resolve, assume it may */
2181         if (!console)
2182                 return true;
2183
2184         /* "tty0" means the active VC, so it may be the same sometimes */
2185         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2186 }
2187
2188 bool exec_context_may_touch_console(ExecContext *ec) {
2189         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2190                 is_terminal_input(ec->std_input) ||
2191                 is_terminal_output(ec->std_output) ||
2192                 is_terminal_output(ec->std_error)) &&
2193                tty_may_match_dev_console(tty_path(ec));
2194 }
2195
2196 static void strv_fprintf(FILE *f, char **l) {
2197         char **g;
2198
2199         assert(f);
2200
2201         STRV_FOREACH(g, l)
2202                 fprintf(f, " %s", *g);
2203 }
2204
2205 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2206         char **e;
2207         unsigned i;
2208
2209         assert(c);
2210         assert(f);
2211
2212         prefix = strempty(prefix);
2213
2214         fprintf(f,
2215                 "%sUMask: %04o\n"
2216                 "%sWorkingDirectory: %s\n"
2217                 "%sRootDirectory: %s\n"
2218                 "%sNonBlocking: %s\n"
2219                 "%sPrivateTmp: %s\n"
2220                 "%sPrivateNetwork: %s\n"
2221                 "%sPrivateDevices: %s\n"
2222                 "%sProtectHome: %s\n"
2223                 "%sProtectSystem: %s\n"
2224                 "%sIgnoreSIGPIPE: %s\n",
2225                 prefix, c->umask,
2226                 prefix, c->working_directory ? c->working_directory : "/",
2227                 prefix, c->root_directory ? c->root_directory : "/",
2228                 prefix, yes_no(c->non_blocking),
2229                 prefix, yes_no(c->private_tmp),
2230                 prefix, yes_no(c->private_network),
2231                 prefix, yes_no(c->private_devices),
2232                 prefix, protect_home_to_string(c->protect_home),
2233                 prefix, protect_system_to_string(c->protect_system),
2234                 prefix, yes_no(c->ignore_sigpipe));
2235
2236         STRV_FOREACH(e, c->environment)
2237                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2238
2239         STRV_FOREACH(e, c->environment_files)
2240                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2241
2242         if (c->nice_set)
2243                 fprintf(f,
2244                         "%sNice: %i\n",
2245                         prefix, c->nice);
2246
2247         if (c->oom_score_adjust_set)
2248                 fprintf(f,
2249                         "%sOOMScoreAdjust: %i\n",
2250                         prefix, c->oom_score_adjust);
2251
2252         for (i = 0; i < RLIM_NLIMITS; i++)
2253                 if (c->rlimit[i])
2254                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2255                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2256
2257         if (c->ioprio_set) {
2258                 _cleanup_free_ char *class_str = NULL;
2259
2260                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2261                 fprintf(f,
2262                         "%sIOSchedulingClass: %s\n"
2263                         "%sIOPriority: %i\n",
2264                         prefix, strna(class_str),
2265                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2266         }
2267
2268         if (c->cpu_sched_set) {
2269                 _cleanup_free_ char *policy_str = NULL;
2270
2271                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2272                 fprintf(f,
2273                         "%sCPUSchedulingPolicy: %s\n"
2274                         "%sCPUSchedulingPriority: %i\n"
2275                         "%sCPUSchedulingResetOnFork: %s\n",
2276                         prefix, strna(policy_str),
2277                         prefix, c->cpu_sched_priority,
2278                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2279         }
2280
2281         if (c->cpuset) {
2282                 fprintf(f, "%sCPUAffinity:", prefix);
2283                 for (i = 0; i < c->cpuset_ncpus; i++)
2284                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2285                                 fprintf(f, " %u", i);
2286                 fputs("\n", f);
2287         }
2288
2289         if (c->timer_slack_nsec != NSEC_INFINITY)
2290                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2291
2292         fprintf(f,
2293                 "%sStandardInput: %s\n"
2294                 "%sStandardOutput: %s\n"
2295                 "%sStandardError: %s\n",
2296                 prefix, exec_input_to_string(c->std_input),
2297                 prefix, exec_output_to_string(c->std_output),
2298                 prefix, exec_output_to_string(c->std_error));
2299
2300         if (c->tty_path)
2301                 fprintf(f,
2302                         "%sTTYPath: %s\n"
2303                         "%sTTYReset: %s\n"
2304                         "%sTTYVHangup: %s\n"
2305                         "%sTTYVTDisallocate: %s\n",
2306                         prefix, c->tty_path,
2307                         prefix, yes_no(c->tty_reset),
2308                         prefix, yes_no(c->tty_vhangup),
2309                         prefix, yes_no(c->tty_vt_disallocate));
2310
2311         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2312             c->std_output == EXEC_OUTPUT_KMSG ||
2313             c->std_output == EXEC_OUTPUT_JOURNAL ||
2314             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2315             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2316             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2317             c->std_error == EXEC_OUTPUT_SYSLOG ||
2318             c->std_error == EXEC_OUTPUT_KMSG ||
2319             c->std_error == EXEC_OUTPUT_JOURNAL ||
2320             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2321             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2322             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2323
2324                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2325
2326                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2327                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2328
2329                 fprintf(f,
2330                         "%sSyslogFacility: %s\n"
2331                         "%sSyslogLevel: %s\n",
2332                         prefix, strna(fac_str),
2333                         prefix, strna(lvl_str));
2334         }
2335
2336         if (c->capabilities) {
2337                 _cleanup_cap_free_charp_ char *t;
2338
2339                 t = cap_to_text(c->capabilities, NULL);
2340                 if (t)
2341                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2342         }
2343
2344         if (c->secure_bits)
2345                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2346                         prefix,
2347                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2348                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2349                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2350                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2351                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2352                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2353
2354         if (c->capability_bounding_set_drop) {
2355                 unsigned long l;
2356                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2357
2358                 for (l = 0; l <= cap_last_cap(); l++)
2359                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2360                                 fprintf(f, " %s", strna(capability_to_name(l)));
2361
2362                 fputs("\n", f);
2363         }
2364
2365         if (c->user)
2366                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2367         if (c->group)
2368                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2369
2370         if (strv_length(c->supplementary_groups) > 0) {
2371                 fprintf(f, "%sSupplementaryGroups:", prefix);
2372                 strv_fprintf(f, c->supplementary_groups);
2373                 fputs("\n", f);
2374         }
2375
2376         if (c->pam_name)
2377                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2378
2379         if (strv_length(c->read_write_dirs) > 0) {
2380                 fprintf(f, "%sReadWriteDirs:", prefix);
2381                 strv_fprintf(f, c->read_write_dirs);
2382                 fputs("\n", f);
2383         }
2384
2385         if (strv_length(c->read_only_dirs) > 0) {
2386                 fprintf(f, "%sReadOnlyDirs:", prefix);
2387                 strv_fprintf(f, c->read_only_dirs);
2388                 fputs("\n", f);
2389         }
2390
2391         if (strv_length(c->inaccessible_dirs) > 0) {
2392                 fprintf(f, "%sInaccessibleDirs:", prefix);
2393                 strv_fprintf(f, c->inaccessible_dirs);
2394                 fputs("\n", f);
2395         }
2396
2397         if (c->utmp_id)
2398                 fprintf(f,
2399                         "%sUtmpIdentifier: %s\n",
2400                         prefix, c->utmp_id);
2401
2402         if (c->selinux_context)
2403                 fprintf(f,
2404                         "%sSELinuxContext: %s%s\n",
2405                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2406
2407         if (c->personality != 0xffffffffUL)
2408                 fprintf(f,
2409                         "%sPersonality: %s\n",
2410                         prefix, strna(personality_to_string(c->personality)));
2411
2412         if (c->syscall_filter) {
2413 #ifdef HAVE_SECCOMP
2414                 Iterator j;
2415                 void *id;
2416                 bool first = true;
2417 #endif
2418
2419                 fprintf(f,
2420                         "%sSystemCallFilter: ",
2421                         prefix);
2422
2423                 if (!c->syscall_whitelist)
2424                         fputc('~', f);
2425
2426 #ifdef HAVE_SECCOMP
2427                 SET_FOREACH(id, c->syscall_filter, j) {
2428                         _cleanup_free_ char *name = NULL;
2429
2430                         if (first)
2431                                 first = false;
2432                         else
2433                                 fputc(' ', f);
2434
2435                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2436                         fputs(strna(name), f);
2437                 }
2438 #endif
2439
2440                 fputc('\n', f);
2441         }
2442
2443         if (c->syscall_archs) {
2444 #ifdef HAVE_SECCOMP
2445                 Iterator j;
2446                 void *id;
2447 #endif
2448
2449                 fprintf(f,
2450                         "%sSystemCallArchitectures:",
2451                         prefix);
2452
2453 #ifdef HAVE_SECCOMP
2454                 SET_FOREACH(id, c->syscall_archs, j)
2455                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2456 #endif
2457                 fputc('\n', f);
2458         }
2459
2460         if (c->syscall_errno != 0)
2461                 fprintf(f,
2462                         "%sSystemCallErrorNumber: %s\n",
2463                         prefix, strna(errno_to_name(c->syscall_errno)));
2464
2465         if (c->apparmor_profile)
2466                 fprintf(f,
2467                         "%sAppArmorProfile: %s%s\n",
2468                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2469 }
2470
2471 bool exec_context_maintains_privileges(ExecContext *c) {
2472         assert(c);
2473
2474         /* Returns true if the process forked off would run run under
2475          * an unchanged UID or as root. */
2476
2477         if (!c->user)
2478                 return true;
2479
2480         if (streq(c->user, "root") || streq(c->user, "0"))
2481                 return true;
2482
2483         return false;
2484 }
2485
2486 void exec_status_start(ExecStatus *s, pid_t pid) {
2487         assert(s);
2488
2489         zero(*s);
2490         s->pid = pid;
2491         dual_timestamp_get(&s->start_timestamp);
2492 }
2493
2494 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2495         assert(s);
2496
2497         if (s->pid && s->pid != pid)
2498                 zero(*s);
2499
2500         s->pid = pid;
2501         dual_timestamp_get(&s->exit_timestamp);
2502
2503         s->code = code;
2504         s->status = status;
2505
2506         if (context) {
2507                 if (context->utmp_id)
2508                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2509
2510                 exec_context_tty_reset(context);
2511         }
2512 }
2513
2514 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2515         char buf[FORMAT_TIMESTAMP_MAX];
2516
2517         assert(s);
2518         assert(f);
2519
2520         if (s->pid <= 0)
2521                 return;
2522
2523         prefix = strempty(prefix);
2524
2525         fprintf(f,
2526                 "%sPID: "PID_FMT"\n",
2527                 prefix, s->pid);
2528
2529         if (s->start_timestamp.realtime > 0)
2530                 fprintf(f,
2531                         "%sStart Timestamp: %s\n",
2532                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2533
2534         if (s->exit_timestamp.realtime > 0)
2535                 fprintf(f,
2536                         "%sExit Timestamp: %s\n"
2537                         "%sExit Code: %s\n"
2538                         "%sExit Status: %i\n",
2539                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2540                         prefix, sigchld_code_to_string(s->code),
2541                         prefix, s->status);
2542 }
2543
2544 char *exec_command_line(char **argv) {
2545         size_t k;
2546         char *n, *p, **a;
2547         bool first = true;
2548
2549         assert(argv);
2550
2551         k = 1;
2552         STRV_FOREACH(a, argv)
2553                 k += strlen(*a)+3;
2554
2555         if (!(n = new(char, k)))
2556                 return NULL;
2557
2558         p = n;
2559         STRV_FOREACH(a, argv) {
2560
2561                 if (!first)
2562                         *(p++) = ' ';
2563                 else
2564                         first = false;
2565
2566                 if (strpbrk(*a, WHITESPACE)) {
2567                         *(p++) = '\'';
2568                         p = stpcpy(p, *a);
2569                         *(p++) = '\'';
2570                 } else
2571                         p = stpcpy(p, *a);
2572
2573         }
2574
2575         *p = 0;
2576
2577         /* FIXME: this doesn't really handle arguments that have
2578          * spaces and ticks in them */
2579
2580         return n;
2581 }
2582
2583 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2584         _cleanup_free_ char *cmd = NULL;
2585         const char *prefix2;
2586
2587         assert(c);
2588         assert(f);
2589
2590         prefix = strempty(prefix);
2591         prefix2 = strjoina(prefix, "\t");
2592
2593         cmd = exec_command_line(c->argv);
2594         fprintf(f,
2595                 "%sCommand Line: %s\n",
2596                 prefix, cmd ? cmd : strerror(ENOMEM));
2597
2598         exec_status_dump(&c->exec_status, f, prefix2);
2599 }
2600
2601 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2602         assert(f);
2603
2604         prefix = strempty(prefix);
2605
2606         LIST_FOREACH(command, c, c)
2607                 exec_command_dump(c, f, prefix);
2608 }
2609
2610 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2611         ExecCommand *end;
2612
2613         assert(l);
2614         assert(e);
2615
2616         if (*l) {
2617                 /* It's kind of important, that we keep the order here */
2618                 LIST_FIND_TAIL(command, *l, end);
2619                 LIST_INSERT_AFTER(command, *l, end, e);
2620         } else
2621               *l = e;
2622 }
2623
2624 int exec_command_set(ExecCommand *c, const char *path, ...) {
2625         va_list ap;
2626         char **l, *p;
2627
2628         assert(c);
2629         assert(path);
2630
2631         va_start(ap, path);
2632         l = strv_new_ap(path, ap);
2633         va_end(ap);
2634
2635         if (!l)
2636                 return -ENOMEM;
2637
2638         p = strdup(path);
2639         if (!p) {
2640                 strv_free(l);
2641                 return -ENOMEM;
2642         }
2643
2644         free(c->path);
2645         c->path = p;
2646
2647         strv_free(c->argv);
2648         c->argv = l;
2649
2650         return 0;
2651 }
2652
2653 int exec_command_append(ExecCommand *c, const char *path, ...) {
2654         _cleanup_strv_free_ char **l = NULL;
2655         va_list ap;
2656         int r;
2657
2658         assert(c);
2659         assert(path);
2660
2661         va_start(ap, path);
2662         l = strv_new_ap(path, ap);
2663         va_end(ap);
2664
2665         if (!l)
2666                 return -ENOMEM;
2667
2668         r = strv_extend_strv(&c->argv, l);
2669         if (r < 0)
2670                 return r;
2671
2672         return 0;
2673 }
2674
2675
2676 static int exec_runtime_allocate(ExecRuntime **rt) {
2677
2678         if (*rt)
2679                 return 0;
2680
2681         *rt = new0(ExecRuntime, 1);
2682         if (!*rt)
2683                 return -ENOMEM;
2684
2685         (*rt)->n_ref = 1;
2686         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2687
2688         return 0;
2689 }
2690
2691 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2692         int r;
2693
2694         assert(rt);
2695         assert(c);
2696         assert(id);
2697
2698         if (*rt)
2699                 return 1;
2700
2701         if (!c->private_network && !c->private_tmp)
2702                 return 0;
2703
2704         r = exec_runtime_allocate(rt);
2705         if (r < 0)
2706                 return r;
2707
2708         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2709                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2710                         return -errno;
2711         }
2712
2713         if (c->private_tmp && !(*rt)->tmp_dir) {
2714                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2715                 if (r < 0)
2716                         return r;
2717         }
2718
2719         return 1;
2720 }
2721
2722 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2723         assert(r);
2724         assert(r->n_ref > 0);
2725
2726         r->n_ref++;
2727         return r;
2728 }
2729
2730 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2731
2732         if (!r)
2733                 return NULL;
2734
2735         assert(r->n_ref > 0);
2736
2737         r->n_ref--;
2738         if (r->n_ref <= 0) {
2739                 free(r->tmp_dir);
2740                 free(r->var_tmp_dir);
2741                 safe_close_pair(r->netns_storage_socket);
2742                 free(r);
2743         }
2744
2745         return NULL;
2746 }
2747
2748 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2749         assert(u);
2750         assert(f);
2751         assert(fds);
2752
2753         if (!rt)
2754                 return 0;
2755
2756         if (rt->tmp_dir)
2757                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2758
2759         if (rt->var_tmp_dir)
2760                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2761
2762         if (rt->netns_storage_socket[0] >= 0) {
2763                 int copy;
2764
2765                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2766                 if (copy < 0)
2767                         return copy;
2768
2769                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2770         }
2771
2772         if (rt->netns_storage_socket[1] >= 0) {
2773                 int copy;
2774
2775                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2776                 if (copy < 0)
2777                         return copy;
2778
2779                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2780         }
2781
2782         return 0;
2783 }
2784
2785 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2786         int r;
2787
2788         assert(rt);
2789         assert(key);
2790         assert(value);
2791
2792         if (streq(key, "tmp-dir")) {
2793                 char *copy;
2794
2795                 r = exec_runtime_allocate(rt);
2796                 if (r < 0)
2797                         return r;
2798
2799                 copy = strdup(value);
2800                 if (!copy)
2801                         return log_oom();
2802
2803                 free((*rt)->tmp_dir);
2804                 (*rt)->tmp_dir = copy;
2805
2806         } else if (streq(key, "var-tmp-dir")) {
2807                 char *copy;
2808
2809                 r = exec_runtime_allocate(rt);
2810                 if (r < 0)
2811                         return r;
2812
2813                 copy = strdup(value);
2814                 if (!copy)
2815                         return log_oom();
2816
2817                 free((*rt)->var_tmp_dir);
2818                 (*rt)->var_tmp_dir = copy;
2819
2820         } else if (streq(key, "netns-socket-0")) {
2821                 int fd;
2822
2823                 r = exec_runtime_allocate(rt);
2824                 if (r < 0)
2825                         return r;
2826
2827                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2828                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2829                 else {
2830                         safe_close((*rt)->netns_storage_socket[0]);
2831                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2832                 }
2833         } else if (streq(key, "netns-socket-1")) {
2834                 int fd;
2835
2836                 r = exec_runtime_allocate(rt);
2837                 if (r < 0)
2838                         return r;
2839
2840                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2841                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2842                 else {
2843                         safe_close((*rt)->netns_storage_socket[1]);
2844                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2845                 }
2846         } else
2847                 return 0;
2848
2849         return 1;
2850 }
2851
2852 static void *remove_tmpdir_thread(void *p) {
2853         _cleanup_free_ char *path = p;
2854
2855         rm_rf_dangerous(path, false, true, false);
2856         return NULL;
2857 }
2858
2859 void exec_runtime_destroy(ExecRuntime *rt) {
2860         int r;
2861
2862         if (!rt)
2863                 return;
2864
2865         /* If there are multiple users of this, let's leave the stuff around */
2866         if (rt->n_ref > 1)
2867                 return;
2868
2869         if (rt->tmp_dir) {
2870                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2871
2872                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2873                 if (r < 0) {
2874                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2875                         free(rt->tmp_dir);
2876                 }
2877
2878                 rt->tmp_dir = NULL;
2879         }
2880
2881         if (rt->var_tmp_dir) {
2882                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2883
2884                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2885                 if (r < 0) {
2886                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2887                         free(rt->var_tmp_dir);
2888                 }
2889
2890                 rt->var_tmp_dir = NULL;
2891         }
2892
2893         safe_close_pair(rt->netns_storage_socket);
2894 }
2895
2896 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2897         [EXEC_INPUT_NULL] = "null",
2898         [EXEC_INPUT_TTY] = "tty",
2899         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2900         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2901         [EXEC_INPUT_SOCKET] = "socket"
2902 };
2903
2904 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2905
2906 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2907         [EXEC_OUTPUT_INHERIT] = "inherit",
2908         [EXEC_OUTPUT_NULL] = "null",
2909         [EXEC_OUTPUT_TTY] = "tty",
2910         [EXEC_OUTPUT_SYSLOG] = "syslog",
2911         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2912         [EXEC_OUTPUT_KMSG] = "kmsg",
2913         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2914         [EXEC_OUTPUT_JOURNAL] = "journal",
2915         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2916         [EXEC_OUTPUT_SOCKET] = "socket"
2917 };
2918
2919 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);