chiark / gitweb /
874cdc7efee2730e12c09063920cc54b10c3ec3c
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104         int start, restart_from;
105
106         if (n_fds <= 0)
107                 return 0;
108
109         /* Modifies the fds array! (sorts it) */
110
111         assert(fds);
112
113         start = 0;
114         for (;;) {
115                 int i;
116
117                 restart_from = -1;
118
119                 for (i = start; i < (int) n_fds; i++) {
120                         int nfd;
121
122                         /* Already at right index? */
123                         if (fds[i] == i+3)
124                                 continue;
125
126                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127                                 return -errno;
128
129                         safe_close(fds[i]);
130                         fds[i] = nfd;
131
132                         /* Hmm, the fd we wanted isn't free? Then
133                          * let's remember that and try again from here */
134                         if (nfd != i+3 && restart_from < 0)
135                                 restart_from = i;
136                 }
137
138                 if (restart_from < 0)
139                         break;
140
141                 start = restart_from;
142         }
143
144         return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148         unsigned i;
149         int r;
150
151         if (n_fds <= 0)
152                 return 0;
153
154         assert(fds);
155
156         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158         for (i = 0; i < n_fds; i++) {
159
160                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161                         return r;
162
163                 /* We unconditionally drop FD_CLOEXEC from the fds,
164                  * since after all we want to pass these fds to our
165                  * children */
166
167                 if ((r = fd_cloexec(fds[i], false)) < 0)
168                         return r;
169         }
170
171         return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175         assert(context);
176
177         if (context->tty_path)
178                 return context->tty_path;
179
180         return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184         assert(context);
185
186         if (context->tty_vhangup)
187                 terminal_vhangup(tty_path(context));
188
189         if (context->tty_reset)
190                 reset_terminal(tty_path(context));
191
192         if (context->tty_vt_disallocate && context->tty_path)
193                 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197         return
198                 o == EXEC_OUTPUT_TTY ||
199                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205         int fd, r;
206
207         assert(nfd >= 0);
208
209         fd = open("/dev/null", flags|O_NOCTTY);
210         if (fd < 0)
211                 return -errno;
212
213         if (fd != nfd) {
214                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215                 safe_close(fd);
216         } else
217                 r = nfd;
218
219         return r;
220 }
221
222 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
223         union sockaddr_union sa = {
224                 .un.sun_family = AF_UNIX,
225                 .un.sun_path = "/run/systemd/journal/stdout",
226         };
227         uid_t olduid = UID_INVALID;
228         gid_t oldgid = GID_INVALID;
229         int r;
230
231         if (gid != GID_INVALID) {
232                 oldgid = getgid();
233
234                 r = setegid(gid);
235                 if (r < 0)
236                         return -errno;
237         }
238
239         if (uid != UID_INVALID) {
240                 olduid = getuid();
241
242                 r = seteuid(uid);
243                 if (r < 0) {
244                         r = -errno;
245                         goto restore_gid;
246                 }
247         }
248
249         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
250         if (r < 0)
251                 r = -errno;
252
253         /* If we fail to restore the uid or gid, things will likely
254            fail later on. This should only happen if an LSM interferes. */
255
256         if (uid != UID_INVALID)
257                 (void) seteuid(olduid);
258
259  restore_gid:
260         if (gid != GID_INVALID)
261                 (void) setegid(oldgid);
262
263         return r;
264 }
265
266 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
267         int fd, r;
268
269         assert(context);
270         assert(output < _EXEC_OUTPUT_MAX);
271         assert(ident);
272         assert(nfd >= 0);
273
274         fd = socket(AF_UNIX, SOCK_STREAM, 0);
275         if (fd < 0)
276                 return -errno;
277
278         r = connect_journal_socket(fd, uid, gid);
279         if (r < 0)
280                 return r;
281
282         if (shutdown(fd, SHUT_RD) < 0) {
283                 safe_close(fd);
284                 return -errno;
285         }
286
287         fd_inc_sndbuf(fd, SNDBUF_SIZE);
288
289         dprintf(fd,
290                 "%s\n"
291                 "%s\n"
292                 "%i\n"
293                 "%i\n"
294                 "%i\n"
295                 "%i\n"
296                 "%i\n",
297                 context->syslog_identifier ? context->syslog_identifier : ident,
298                 unit_id,
299                 context->syslog_priority,
300                 !!context->syslog_level_prefix,
301                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
302                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
303                 is_terminal_output(output));
304
305         if (fd != nfd) {
306                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
307                 safe_close(fd);
308         } else
309                 r = nfd;
310
311         return r;
312 }
313 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
314         int fd, r;
315
316         assert(path);
317         assert(nfd >= 0);
318
319         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
320                 return fd;
321
322         if (fd != nfd) {
323                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
324                 safe_close(fd);
325         } else
326                 r = nfd;
327
328         return r;
329 }
330
331 static bool is_terminal_input(ExecInput i) {
332         return
333                 i == EXEC_INPUT_TTY ||
334                 i == EXEC_INPUT_TTY_FORCE ||
335                 i == EXEC_INPUT_TTY_FAIL;
336 }
337
338 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
339
340         if (is_terminal_input(std_input) && !apply_tty_stdin)
341                 return EXEC_INPUT_NULL;
342
343         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
344                 return EXEC_INPUT_NULL;
345
346         return std_input;
347 }
348
349 static int fixup_output(ExecOutput std_output, int socket_fd) {
350
351         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
352                 return EXEC_OUTPUT_INHERIT;
353
354         return std_output;
355 }
356
357 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
358         ExecInput i;
359
360         assert(context);
361
362         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
363
364         switch (i) {
365
366         case EXEC_INPUT_NULL:
367                 return open_null_as(O_RDONLY, STDIN_FILENO);
368
369         case EXEC_INPUT_TTY:
370         case EXEC_INPUT_TTY_FORCE:
371         case EXEC_INPUT_TTY_FAIL: {
372                 int fd, r;
373
374                 fd = acquire_terminal(tty_path(context),
375                                       i == EXEC_INPUT_TTY_FAIL,
376                                       i == EXEC_INPUT_TTY_FORCE,
377                                       false,
378                                       USEC_INFINITY);
379                 if (fd < 0)
380                         return fd;
381
382                 if (fd != STDIN_FILENO) {
383                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
384                         safe_close(fd);
385                 } else
386                         r = STDIN_FILENO;
387
388                 return r;
389         }
390
391         case EXEC_INPUT_SOCKET:
392                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
393
394         default:
395                 assert_not_reached("Unknown input type");
396         }
397 }
398
399 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
400         ExecOutput o;
401         ExecInput i;
402         int r;
403
404         assert(context);
405         assert(ident);
406
407         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
408         o = fixup_output(context->std_output, socket_fd);
409
410         if (fileno == STDERR_FILENO) {
411                 ExecOutput e;
412                 e = fixup_output(context->std_error, socket_fd);
413
414                 /* This expects the input and output are already set up */
415
416                 /* Don't change the stderr file descriptor if we inherit all
417                  * the way and are not on a tty */
418                 if (e == EXEC_OUTPUT_INHERIT &&
419                     o == EXEC_OUTPUT_INHERIT &&
420                     i == EXEC_INPUT_NULL &&
421                     !is_terminal_input(context->std_input) &&
422                     getppid () != 1)
423                         return fileno;
424
425                 /* Duplicate from stdout if possible */
426                 if (e == o || e == EXEC_OUTPUT_INHERIT)
427                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
428
429                 o = e;
430
431         } else if (o == EXEC_OUTPUT_INHERIT) {
432                 /* If input got downgraded, inherit the original value */
433                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
434                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
435
436                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437                 if (i != EXEC_INPUT_NULL)
438                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
439
440                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
441                 if (getppid() != 1)
442                         return fileno;
443
444                 /* We need to open /dev/null here anew, to get the right access mode. */
445                 return open_null_as(O_WRONLY, fileno);
446         }
447
448         switch (o) {
449
450         case EXEC_OUTPUT_NULL:
451                 return open_null_as(O_WRONLY, fileno);
452
453         case EXEC_OUTPUT_TTY:
454                 if (is_terminal_input(i))
455                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
456
457                 /* We don't reset the terminal if this is just about output */
458                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
459
460         case EXEC_OUTPUT_SYSLOG:
461         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
462         case EXEC_OUTPUT_KMSG:
463         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
464         case EXEC_OUTPUT_JOURNAL:
465         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
466                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
467                 if (r < 0) {
468                         log_unit_struct(unit_id,
469                                         LOG_ERR,
470                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
471                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
472                                                     unit_id, strerror(-r)),
473                                         LOG_ERRNO(-r),
474                                         NULL);
475                         r = open_null_as(O_WRONLY, fileno);
476                 }
477                 return r;
478
479         case EXEC_OUTPUT_SOCKET:
480                 assert(socket_fd >= 0);
481                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
482
483         default:
484                 assert_not_reached("Unknown error type");
485         }
486 }
487
488 static int chown_terminal(int fd, uid_t uid) {
489         struct stat st;
490
491         assert(fd >= 0);
492
493         /* This might fail. What matters are the results. */
494         (void) fchown(fd, uid, -1);
495         (void) fchmod(fd, TTY_MODE);
496
497         if (fstat(fd, &st) < 0)
498                 return -errno;
499
500         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
501                 return -EPERM;
502
503         return 0;
504 }
505
506 static int setup_confirm_stdio(int *_saved_stdin,
507                                int *_saved_stdout) {
508         int fd = -1, saved_stdin, saved_stdout = -1, r;
509
510         assert(_saved_stdin);
511         assert(_saved_stdout);
512
513         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
514         if (saved_stdin < 0)
515                 return -errno;
516
517         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
518         if (saved_stdout < 0) {
519                 r = errno;
520                 goto fail;
521         }
522
523         fd = acquire_terminal(
524                         "/dev/console",
525                         false,
526                         false,
527                         false,
528                         DEFAULT_CONFIRM_USEC);
529         if (fd < 0) {
530                 r = fd;
531                 goto fail;
532         }
533
534         r = chown_terminal(fd, getuid());
535         if (r < 0)
536                 goto fail;
537
538         if (dup2(fd, STDIN_FILENO) < 0) {
539                 r = -errno;
540                 goto fail;
541         }
542
543         if (dup2(fd, STDOUT_FILENO) < 0) {
544                 r = -errno;
545                 goto fail;
546         }
547
548         if (fd >= 2)
549                 safe_close(fd);
550
551         *_saved_stdin = saved_stdin;
552         *_saved_stdout = saved_stdout;
553
554         return 0;
555
556 fail:
557         safe_close(saved_stdout);
558         safe_close(saved_stdin);
559         safe_close(fd);
560
561         return r;
562 }
563
564 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
565         _cleanup_close_ int fd = -1;
566         va_list ap;
567
568         assert(format);
569
570         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
571         if (fd < 0)
572                 return fd;
573
574         va_start(ap, format);
575         vdprintf(fd, format, ap);
576         va_end(ap);
577
578         return 0;
579 }
580
581 static int restore_confirm_stdio(int *saved_stdin,
582                                  int *saved_stdout) {
583
584         int r = 0;
585
586         assert(saved_stdin);
587         assert(saved_stdout);
588
589         release_terminal();
590
591         if (*saved_stdin >= 0)
592                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
593                         r = -errno;
594
595         if (*saved_stdout >= 0)
596                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
597                         r = -errno;
598
599         safe_close(*saved_stdin);
600         safe_close(*saved_stdout);
601
602         return r;
603 }
604
605 static int ask_for_confirmation(char *response, char **argv) {
606         int saved_stdout = -1, saved_stdin = -1, r;
607         _cleanup_free_ char *line = NULL;
608
609         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
610         if (r < 0)
611                 return r;
612
613         line = exec_command_line(argv);
614         if (!line)
615                 return -ENOMEM;
616
617         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
618
619         restore_confirm_stdio(&saved_stdin, &saved_stdout);
620
621         return r;
622 }
623
624 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
625         bool keep_groups = false;
626         int r;
627
628         assert(context);
629
630         /* Lookup and set GID and supplementary group list. Here too
631          * we avoid NSS lookups for gid=0. */
632
633         if (context->group || username) {
634
635                 if (context->group) {
636                         const char *g = context->group;
637
638                         if ((r = get_group_creds(&g, &gid)) < 0)
639                                 return r;
640                 }
641
642                 /* First step, initialize groups from /etc/groups */
643                 if (username && gid != 0) {
644                         if (initgroups(username, gid) < 0)
645                                 return -errno;
646
647                         keep_groups = true;
648                 }
649
650                 /* Second step, set our gids */
651                 if (setresgid(gid, gid, gid) < 0)
652                         return -errno;
653         }
654
655         if (context->supplementary_groups) {
656                 int ngroups_max, k;
657                 gid_t *gids;
658                 char **i;
659
660                 /* Final step, initialize any manually set supplementary groups */
661                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
662
663                 if (!(gids = new(gid_t, ngroups_max)))
664                         return -ENOMEM;
665
666                 if (keep_groups) {
667                         if ((k = getgroups(ngroups_max, gids)) < 0) {
668                                 free(gids);
669                                 return -errno;
670                         }
671                 } else
672                         k = 0;
673
674                 STRV_FOREACH(i, context->supplementary_groups) {
675                         const char *g;
676
677                         if (k >= ngroups_max) {
678                                 free(gids);
679                                 return -E2BIG;
680                         }
681
682                         g = *i;
683                         r = get_group_creds(&g, gids+k);
684                         if (r < 0) {
685                                 free(gids);
686                                 return r;
687                         }
688
689                         k++;
690                 }
691
692                 if (setgroups(k, gids) < 0) {
693                         free(gids);
694                         return -errno;
695                 }
696
697                 free(gids);
698         }
699
700         return 0;
701 }
702
703 static int enforce_user(const ExecContext *context, uid_t uid) {
704         assert(context);
705
706         /* Sets (but doesn't lookup) the uid and make sure we keep the
707          * capabilities while doing so. */
708
709         if (context->capabilities) {
710                 _cleanup_cap_free_ cap_t d = NULL;
711                 static const cap_value_t bits[] = {
712                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
713                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
714                 };
715
716                 /* First step: If we need to keep capabilities but
717                  * drop privileges we need to make sure we keep our
718                  * caps, while we drop privileges. */
719                 if (uid != 0) {
720                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
721
722                         if (prctl(PR_GET_SECUREBITS) != sb)
723                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
724                                         return -errno;
725                 }
726
727                 /* Second step: set the capabilities. This will reduce
728                  * the capabilities to the minimum we need. */
729
730                 d = cap_dup(context->capabilities);
731                 if (!d)
732                         return -errno;
733
734                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736                         return -errno;
737
738                 if (cap_set_proc(d) < 0)
739                         return -errno;
740         }
741
742         /* Third step: actually set the uids */
743         if (setresuid(uid, uid, uid) < 0)
744                 return -errno;
745
746         /* At this point we should have all necessary capabilities but
747            are otherwise a normal user. However, the caps might got
748            corrupted due to the setresuid() so we need clean them up
749            later. This is done outside of this call. */
750
751         return 0;
752 }
753
754 #ifdef HAVE_PAM
755
756 static int null_conv(
757                 int num_msg,
758                 const struct pam_message **msg,
759                 struct pam_response **resp,
760                 void *appdata_ptr) {
761
762         /* We don't support conversations */
763
764         return PAM_CONV_ERR;
765 }
766
767 static int setup_pam(
768                 const char *name,
769                 const char *user,
770                 uid_t uid,
771                 const char *tty,
772                 char ***pam_env,
773                 int fds[], unsigned n_fds) {
774
775         static const struct pam_conv conv = {
776                 .conv = null_conv,
777                 .appdata_ptr = NULL
778         };
779
780         pam_handle_t *handle = NULL;
781         sigset_t ss, old_ss;
782         int pam_code = PAM_SUCCESS;
783         int err;
784         char **e = NULL;
785         bool close_session = false;
786         pid_t pam_pid = 0, parent_pid;
787         int flags = 0;
788
789         assert(name);
790         assert(user);
791         assert(pam_env);
792
793         /* We set up PAM in the parent process, then fork. The child
794          * will then stay around until killed via PR_GET_PDEATHSIG or
795          * systemd via the cgroup logic. It will then remove the PAM
796          * session again. The parent process will exec() the actual
797          * daemon. We do things this way to ensure that the main PID
798          * of the daemon is the one we initially fork()ed. */
799
800         if (log_get_max_level() < LOG_DEBUG)
801                 flags |= PAM_SILENT;
802
803         pam_code = pam_start(name, user, &conv, &handle);
804         if (pam_code != PAM_SUCCESS) {
805                 handle = NULL;
806                 goto fail;
807         }
808
809         if (tty) {
810                 pam_code = pam_set_item(handle, PAM_TTY, tty);
811                 if (pam_code != PAM_SUCCESS)
812                         goto fail;
813         }
814
815         pam_code = pam_acct_mgmt(handle, flags);
816         if (pam_code != PAM_SUCCESS)
817                 goto fail;
818
819         pam_code = pam_open_session(handle, flags);
820         if (pam_code != PAM_SUCCESS)
821                 goto fail;
822
823         close_session = true;
824
825         e = pam_getenvlist(handle);
826         if (!e) {
827                 pam_code = PAM_BUF_ERR;
828                 goto fail;
829         }
830
831         /* Block SIGTERM, so that we know that it won't get lost in
832          * the child */
833         if (sigemptyset(&ss) < 0 ||
834             sigaddset(&ss, SIGTERM) < 0 ||
835             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
836                 goto fail;
837
838         parent_pid = getpid();
839
840         pam_pid = fork();
841         if (pam_pid < 0)
842                 goto fail;
843
844         if (pam_pid == 0) {
845                 int sig;
846                 int r = EXIT_PAM;
847
848                 /* The child's job is to reset the PAM session on
849                  * termination */
850
851                 /* This string must fit in 10 chars (i.e. the length
852                  * of "/sbin/init"), to look pretty in /bin/ps */
853                 rename_process("(sd-pam)");
854
855                 /* Make sure we don't keep open the passed fds in this
856                 child. We assume that otherwise only those fds are
857                 open here that have been opened by PAM. */
858                 close_many(fds, n_fds);
859
860                 /* Drop privileges - we don't need any to pam_close_session
861                  * and this will make PR_SET_PDEATHSIG work in most cases.
862                  * If this fails, ignore the error - but expect sd-pam threads
863                  * to fail to exit normally */
864                 if (setresuid(uid, uid, uid) < 0)
865                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
866
867                 /* Wait until our parent died. This will only work if
868                  * the above setresuid() succeeds, otherwise the kernel
869                  * will not allow unprivileged parents kill their privileged
870                  * children this way. We rely on the control groups kill logic
871                  * to do the rest for us. */
872                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873                         goto child_finish;
874
875                 /* Check if our parent process might already have
876                  * died? */
877                 if (getppid() == parent_pid) {
878                         for (;;) {
879                                 if (sigwait(&ss, &sig) < 0) {
880                                         if (errno == EINTR)
881                                                 continue;
882
883                                         goto child_finish;
884                                 }
885
886                                 assert(sig == SIGTERM);
887                                 break;
888                         }
889                 }
890
891                 /* If our parent died we'll end the session */
892                 if (getppid() != parent_pid) {
893                         pam_code = pam_close_session(handle, flags);
894                         if (pam_code != PAM_SUCCESS)
895                                 goto child_finish;
896                 }
897
898                 r = 0;
899
900         child_finish:
901                 pam_end(handle, pam_code | flags);
902                 _exit(r);
903         }
904
905         /* If the child was forked off successfully it will do all the
906          * cleanups, so forget about the handle here. */
907         handle = NULL;
908
909         /* Unblock SIGTERM again in the parent */
910         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
911                 goto fail;
912
913         /* We close the log explicitly here, since the PAM modules
914          * might have opened it, but we don't want this fd around. */
915         closelog();
916
917         *pam_env = e;
918         e = NULL;
919
920         return 0;
921
922 fail:
923         if (pam_code != PAM_SUCCESS) {
924                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
925                 err = -EPERM;  /* PAM errors do not map to errno */
926         } else {
927                 log_error_errno(errno, "PAM failed: %m");
928                 err = -errno;
929         }
930
931         if (handle) {
932                 if (close_session)
933                         pam_code = pam_close_session(handle, flags);
934
935                 pam_end(handle, pam_code | flags);
936         }
937
938         strv_free(e);
939
940         closelog();
941
942         if (pam_pid > 1) {
943                 kill(pam_pid, SIGTERM);
944                 kill(pam_pid, SIGCONT);
945         }
946
947         return err;
948 }
949 #endif
950
951 static void rename_process_from_path(const char *path) {
952         char process_name[11];
953         const char *p;
954         size_t l;
955
956         /* This resulting string must fit in 10 chars (i.e. the length
957          * of "/sbin/init") to look pretty in /bin/ps */
958
959         p = basename(path);
960         if (isempty(p)) {
961                 rename_process("(...)");
962                 return;
963         }
964
965         l = strlen(p);
966         if (l > 8) {
967                 /* The end of the process name is usually more
968                  * interesting, since the first bit might just be
969                  * "systemd-" */
970                 p = p + l - 8;
971                 l = 8;
972         }
973
974         process_name[0] = '(';
975         memcpy(process_name+1, p, l);
976         process_name[1+l] = ')';
977         process_name[1+l+1] = 0;
978
979         rename_process(process_name);
980 }
981
982 #ifdef HAVE_SECCOMP
983
984 static int apply_seccomp(const ExecContext *c) {
985         uint32_t negative_action, action;
986         scmp_filter_ctx *seccomp;
987         Iterator i;
988         void *id;
989         int r;
990
991         assert(c);
992
993         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
994
995         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
996         if (!seccomp)
997                 return -ENOMEM;
998
999         if (c->syscall_archs) {
1000
1001                 SET_FOREACH(id, c->syscall_archs, i) {
1002                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1003                         if (r == -EEXIST)
1004                                 continue;
1005                         if (r < 0)
1006                                 goto finish;
1007                 }
1008
1009         } else {
1010                 r = seccomp_add_secondary_archs(seccomp);
1011                 if (r < 0)
1012                         goto finish;
1013         }
1014
1015         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1016         SET_FOREACH(id, c->syscall_filter, i) {
1017                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1018                 if (r < 0)
1019                         goto finish;
1020         }
1021
1022         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1023         if (r < 0)
1024                 goto finish;
1025
1026         r = seccomp_load(seccomp);
1027
1028 finish:
1029         seccomp_release(seccomp);
1030         return r;
1031 }
1032
1033 static int apply_address_families(const ExecContext *c) {
1034         scmp_filter_ctx *seccomp;
1035         Iterator i;
1036         int r;
1037
1038         assert(c);
1039
1040         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1041         if (!seccomp)
1042                 return -ENOMEM;
1043
1044         r = seccomp_add_secondary_archs(seccomp);
1045         if (r < 0)
1046                 goto finish;
1047
1048         if (c->address_families_whitelist) {
1049                 int af, first = 0, last = 0;
1050                 void *afp;
1051
1052                 /* If this is a whitelist, we first block the address
1053                  * families that are out of range and then everything
1054                  * that is not in the set. First, we find the lowest
1055                  * and highest address family in the set. */
1056
1057                 SET_FOREACH(afp, c->address_families, i) {
1058                         af = PTR_TO_INT(afp);
1059
1060                         if (af <= 0 || af >= af_max())
1061                                 continue;
1062
1063                         if (first == 0 || af < first)
1064                                 first = af;
1065
1066                         if (last == 0 || af > last)
1067                                 last = af;
1068                 }
1069
1070                 assert((first == 0) == (last == 0));
1071
1072                 if (first == 0) {
1073
1074                         /* No entries in the valid range, block everything */
1075                         r = seccomp_rule_add(
1076                                         seccomp,
1077                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1078                                         SCMP_SYS(socket),
1079                                         0);
1080                         if (r < 0)
1081                                 goto finish;
1082
1083                 } else {
1084
1085                         /* Block everything below the first entry */
1086                         r = seccomp_rule_add(
1087                                         seccomp,
1088                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1089                                         SCMP_SYS(socket),
1090                                         1,
1091                                         SCMP_A0(SCMP_CMP_LT, first));
1092                         if (r < 0)
1093                                 goto finish;
1094
1095                         /* Block everything above the last entry */
1096                         r = seccomp_rule_add(
1097                                         seccomp,
1098                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1099                                         SCMP_SYS(socket),
1100                                         1,
1101                                         SCMP_A0(SCMP_CMP_GT, last));
1102                         if (r < 0)
1103                                 goto finish;
1104
1105                         /* Block everything between the first and last
1106                          * entry */
1107                         for (af = 1; af < af_max(); af++) {
1108
1109                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1110                                         continue;
1111
1112                                 r = seccomp_rule_add(
1113                                                 seccomp,
1114                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1115                                                 SCMP_SYS(socket),
1116                                                 1,
1117                                                 SCMP_A0(SCMP_CMP_EQ, af));
1118                                 if (r < 0)
1119                                         goto finish;
1120                         }
1121                 }
1122
1123         } else {
1124                 void *af;
1125
1126                 /* If this is a blacklist, then generate one rule for
1127                  * each address family that are then combined in OR
1128                  * checks. */
1129
1130                 SET_FOREACH(af, c->address_families, i) {
1131
1132                         r = seccomp_rule_add(
1133                                         seccomp,
1134                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1135                                         SCMP_SYS(socket),
1136                                         1,
1137                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1138                         if (r < 0)
1139                                 goto finish;
1140                 }
1141         }
1142
1143         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1144         if (r < 0)
1145                 goto finish;
1146
1147         r = seccomp_load(seccomp);
1148
1149 finish:
1150         seccomp_release(seccomp);
1151         return r;
1152 }
1153
1154 #endif
1155
1156 static void do_idle_pipe_dance(int idle_pipe[4]) {
1157         assert(idle_pipe);
1158
1159
1160         safe_close(idle_pipe[1]);
1161         safe_close(idle_pipe[2]);
1162
1163         if (idle_pipe[0] >= 0) {
1164                 int r;
1165
1166                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1167
1168                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1169                         /* Signal systemd that we are bored and want to continue. */
1170                         write(idle_pipe[3], "x", 1);
1171
1172                         /* Wait for systemd to react to the signal above. */
1173                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1174                 }
1175
1176                 safe_close(idle_pipe[0]);
1177
1178         }
1179
1180         safe_close(idle_pipe[3]);
1181 }
1182
1183 static int build_environment(
1184                 const ExecContext *c,
1185                 unsigned n_fds,
1186                 usec_t watchdog_usec,
1187                 const char *home,
1188                 const char *username,
1189                 const char *shell,
1190                 char ***ret) {
1191
1192         _cleanup_strv_free_ char **our_env = NULL;
1193         unsigned n_env = 0;
1194         char *x;
1195
1196         assert(c);
1197         assert(ret);
1198
1199         our_env = new0(char*, 10);
1200         if (!our_env)
1201                 return -ENOMEM;
1202
1203         if (n_fds > 0) {
1204                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1205                         return -ENOMEM;
1206                 our_env[n_env++] = x;
1207
1208                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1209                         return -ENOMEM;
1210                 our_env[n_env++] = x;
1211         }
1212
1213         if (watchdog_usec > 0) {
1214                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1215                         return -ENOMEM;
1216                 our_env[n_env++] = x;
1217
1218                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1219                         return -ENOMEM;
1220                 our_env[n_env++] = x;
1221         }
1222
1223         if (home) {
1224                 x = strappend("HOME=", home);
1225                 if (!x)
1226                         return -ENOMEM;
1227                 our_env[n_env++] = x;
1228         }
1229
1230         if (username) {
1231                 x = strappend("LOGNAME=", username);
1232                 if (!x)
1233                         return -ENOMEM;
1234                 our_env[n_env++] = x;
1235
1236                 x = strappend("USER=", username);
1237                 if (!x)
1238                         return -ENOMEM;
1239                 our_env[n_env++] = x;
1240         }
1241
1242         if (shell) {
1243                 x = strappend("SHELL=", shell);
1244                 if (!x)
1245                         return -ENOMEM;
1246                 our_env[n_env++] = x;
1247         }
1248
1249         if (is_terminal_input(c->std_input) ||
1250             c->std_output == EXEC_OUTPUT_TTY ||
1251             c->std_error == EXEC_OUTPUT_TTY ||
1252             c->tty_path) {
1253
1254                 x = strdup(default_term_for_tty(tty_path(c)));
1255                 if (!x)
1256                         return -ENOMEM;
1257                 our_env[n_env++] = x;
1258         }
1259
1260         our_env[n_env++] = NULL;
1261         assert(n_env <= 10);
1262
1263         *ret = our_env;
1264         our_env = NULL;
1265
1266         return 0;
1267 }
1268
1269 static int exec_child(
1270                 ExecCommand *command,
1271                 const ExecContext *context,
1272                 const ExecParameters *params,
1273                 ExecRuntime *runtime,
1274                 char **argv,
1275                 int socket_fd,
1276                 int *fds, unsigned n_fds,
1277                 char **files_env,
1278                 int *exit_status) {
1279
1280         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1281         _cleanup_free_ char *mac_selinux_context_net = NULL;
1282         const char *username = NULL, *home = NULL, *shell = NULL;
1283         unsigned n_dont_close = 0;
1284         int dont_close[n_fds + 4];
1285         uid_t uid = UID_INVALID;
1286         gid_t gid = GID_INVALID;
1287         int i, r;
1288
1289         assert(command);
1290         assert(context);
1291         assert(params);
1292         assert(exit_status);
1293
1294         rename_process_from_path(command->path);
1295
1296         /* We reset exactly these signals, since they are the
1297          * only ones we set to SIG_IGN in the main daemon. All
1298          * others we leave untouched because we set them to
1299          * SIG_DFL or a valid handler initially, both of which
1300          * will be demoted to SIG_DFL. */
1301         default_signals(SIGNALS_CRASH_HANDLER,
1302                         SIGNALS_IGNORE, -1);
1303
1304         if (context->ignore_sigpipe)
1305                 ignore_signals(SIGPIPE, -1);
1306
1307         r = reset_signal_mask();
1308         if (r < 0) {
1309                 *exit_status = EXIT_SIGNAL_MASK;
1310                 return r;
1311         }
1312
1313         if (params->idle_pipe)
1314                 do_idle_pipe_dance(params->idle_pipe);
1315
1316         /* Close sockets very early to make sure we don't
1317          * block init reexecution because it cannot bind its
1318          * sockets */
1319
1320         log_forget_fds();
1321
1322         if (socket_fd >= 0)
1323                 dont_close[n_dont_close++] = socket_fd;
1324         if (n_fds > 0) {
1325                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1326                 n_dont_close += n_fds;
1327         }
1328         if (params->bus_endpoint_fd >= 0)
1329                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1330         if (runtime) {
1331                 if (runtime->netns_storage_socket[0] >= 0)
1332                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1333                 if (runtime->netns_storage_socket[1] >= 0)
1334                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1335         }
1336
1337         r = close_all_fds(dont_close, n_dont_close);
1338         if (r < 0) {
1339                 *exit_status = EXIT_FDS;
1340                 return r;
1341         }
1342
1343         if (!context->same_pgrp)
1344                 if (setsid() < 0) {
1345                         *exit_status = EXIT_SETSID;
1346                         return -errno;
1347                 }
1348
1349         exec_context_tty_reset(context);
1350
1351         if (params->confirm_spawn) {
1352                 char response;
1353
1354                 r = ask_for_confirmation(&response, argv);
1355                 if (r == -ETIMEDOUT)
1356                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1357                 else if (r < 0)
1358                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1359                 else if (response == 's') {
1360                         write_confirm_message("Skipping execution.\n");
1361                         *exit_status = EXIT_CONFIRM;
1362                         return -ECANCELED;
1363                 } else if (response == 'n') {
1364                         write_confirm_message("Failing execution.\n");
1365                         *exit_status = 0;
1366                         return 0;
1367                 }
1368         }
1369
1370         if (context->user) {
1371                 username = context->user;
1372                 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1373                 if (r < 0) {
1374                         *exit_status = EXIT_USER;
1375                         return r;
1376                 }
1377         }
1378
1379         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1380          * must sure to drop O_NONBLOCK */
1381         if (socket_fd >= 0)
1382                 fd_nonblock(socket_fd, false);
1383
1384         r = setup_input(context, socket_fd, params->apply_tty_stdin);
1385         if (r < 0) {
1386                 *exit_status = EXIT_STDIN;
1387                 return r;
1388         }
1389
1390         r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1391         if (r < 0) {
1392                 *exit_status = EXIT_STDOUT;
1393                 return r;
1394         }
1395
1396         r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1397         if (r < 0) {
1398                 *exit_status = EXIT_STDERR;
1399                 return r;
1400         }
1401
1402         if (params->cgroup_path) {
1403                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1404                 if (r < 0) {
1405                         *exit_status = EXIT_CGROUP;
1406                         return r;
1407                 }
1408         }
1409
1410         if (context->oom_score_adjust_set) {
1411                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1412
1413                 /* When we can't make this change due to EPERM, then
1414                  * let's silently skip over it. User namespaces
1415                  * prohibit write access to this file, and we
1416                  * shouldn't trip up over that. */
1417
1418                 sprintf(t, "%i", context->oom_score_adjust);
1419                 r = write_string_file("/proc/self/oom_score_adj", t);
1420                 if (r == -EPERM || r == -EACCES) {
1421                         log_open();
1422                         log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1423                         log_close();
1424                 } else if (r < 0) {
1425                         *exit_status = EXIT_OOM_ADJUST;
1426                         return -errno;
1427                 }
1428         }
1429
1430         if (context->nice_set)
1431                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1432                         *exit_status = EXIT_NICE;
1433                         return -errno;
1434                 }
1435
1436         if (context->cpu_sched_set) {
1437                 struct sched_param param = {
1438                         .sched_priority = context->cpu_sched_priority,
1439                 };
1440
1441                 r = sched_setscheduler(0,
1442                                        context->cpu_sched_policy |
1443                                        (context->cpu_sched_reset_on_fork ?
1444                                         SCHED_RESET_ON_FORK : 0),
1445                                        &param);
1446                 if (r < 0) {
1447                         *exit_status = EXIT_SETSCHEDULER;
1448                         return -errno;
1449                 }
1450         }
1451
1452         if (context->cpuset)
1453                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1454                         *exit_status = EXIT_CPUAFFINITY;
1455                         return -errno;
1456                 }
1457
1458         if (context->ioprio_set)
1459                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1460                         *exit_status = EXIT_IOPRIO;
1461                         return -errno;
1462                 }
1463
1464         if (context->timer_slack_nsec != NSEC_INFINITY)
1465                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1466                         *exit_status = EXIT_TIMERSLACK;
1467                         return -errno;
1468                 }
1469
1470         if (context->personality != 0xffffffffUL)
1471                 if (personality(context->personality) < 0) {
1472                         *exit_status = EXIT_PERSONALITY;
1473                         return -errno;
1474                 }
1475
1476         if (context->utmp_id)
1477                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1478
1479         if (context->user && is_terminal_input(context->std_input)) {
1480                 r = chown_terminal(STDIN_FILENO, uid);
1481                 if (r < 0) {
1482                         *exit_status = EXIT_STDIN;
1483                         return r;
1484                 }
1485         }
1486
1487 #ifdef ENABLE_KDBUS
1488         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1489                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1490
1491                 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1492                 if (r < 0) {
1493                         *exit_status = EXIT_BUS_ENDPOINT;
1494                         return r;
1495                 }
1496         }
1497 #endif
1498
1499         /* If delegation is enabled we'll pass ownership of the cgroup
1500          * (but only in systemd's own controller hierarchy!) to the
1501          * user of the new process. */
1502         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1503                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1504                 if (r < 0) {
1505                         *exit_status = EXIT_CGROUP;
1506                         return r;
1507                 }
1508
1509
1510                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1511                 if (r < 0) {
1512                         *exit_status = EXIT_CGROUP;
1513                         return r;
1514                 }
1515         }
1516
1517         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1518                 char **rt;
1519
1520                 STRV_FOREACH(rt, context->runtime_directory) {
1521                         _cleanup_free_ char *p;
1522
1523                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1524                         if (!p) {
1525                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1526                                 return -ENOMEM;
1527                         }
1528
1529                         r = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1530                         if (r < 0) {
1531                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1532                                 return r;
1533                         }
1534                 }
1535         }
1536
1537         if (params->apply_permissions) {
1538                 r = enforce_groups(context, username, gid);
1539                 if (r < 0) {
1540                         *exit_status = EXIT_GROUP;
1541                         return r;
1542                 }
1543         }
1544
1545         umask(context->umask);
1546
1547 #ifdef HAVE_PAM
1548         if (params->apply_permissions && context->pam_name && username) {
1549                 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1550                 if (r < 0) {
1551                         *exit_status = EXIT_PAM;
1552                         return r;
1553                 }
1554         }
1555 #endif
1556
1557         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1558                 r = setup_netns(runtime->netns_storage_socket);
1559                 if (r < 0) {
1560                         *exit_status = EXIT_NETWORK;
1561                         return r;
1562                 }
1563         }
1564
1565         if (!strv_isempty(context->read_write_dirs) ||
1566             !strv_isempty(context->read_only_dirs) ||
1567             !strv_isempty(context->inaccessible_dirs) ||
1568             context->mount_flags != 0 ||
1569             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1570             params->bus_endpoint_path ||
1571             context->private_devices ||
1572             context->protect_system != PROTECT_SYSTEM_NO ||
1573             context->protect_home != PROTECT_HOME_NO) {
1574
1575                 char *tmp = NULL, *var = NULL;
1576
1577                 /* The runtime struct only contains the parent
1578                  * of the private /tmp, which is
1579                  * non-accessible to world users. Inside of it
1580                  * there's a /tmp that is sticky, and that's
1581                  * the one we want to use here. */
1582
1583                 if (context->private_tmp && runtime) {
1584                         if (runtime->tmp_dir)
1585                                 tmp = strjoina(runtime->tmp_dir, "/tmp");
1586                         if (runtime->var_tmp_dir)
1587                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
1588                 }
1589
1590                 r = setup_namespace(
1591                                 context->read_write_dirs,
1592                                 context->read_only_dirs,
1593                                 context->inaccessible_dirs,
1594                                 tmp,
1595                                 var,
1596                                 params->bus_endpoint_path,
1597                                 context->private_devices,
1598                                 context->protect_home,
1599                                 context->protect_system,
1600                                 context->mount_flags);
1601
1602                 /* If we couldn't set up the namespace this is
1603                  * probably due to a missing capability. In this case,
1604                  * silently proceeed. */
1605                 if (r == -EPERM || r == -EACCES) {
1606                         log_open();
1607                         log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1608                         log_close();
1609                 } else if (r < 0) {
1610                         *exit_status = EXIT_NAMESPACE;
1611                         return r;
1612                 }
1613         }
1614
1615         if (params->apply_chroot) {
1616                 if (context->root_directory)
1617                         if (chroot(context->root_directory) < 0) {
1618                                 *exit_status = EXIT_CHROOT;
1619                                 return -errno;
1620                         }
1621
1622                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1623                         *exit_status = EXIT_CHDIR;
1624                         return -errno;
1625                 }
1626         } else {
1627                 _cleanup_free_ char *d = NULL;
1628
1629                 if (asprintf(&d, "%s/%s",
1630                              context->root_directory ? context->root_directory : "",
1631                              context->working_directory ? context->working_directory : "") < 0) {
1632                         *exit_status = EXIT_MEMORY;
1633                         return -ENOMEM;
1634                 }
1635
1636                 if (chdir(d) < 0) {
1637                         *exit_status = EXIT_CHDIR;
1638                         return -errno;
1639                 }
1640         }
1641
1642 #ifdef HAVE_SELINUX
1643         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1644                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1645                 if (r < 0) {
1646                         *exit_status = EXIT_SELINUX_CONTEXT;
1647                         return r;
1648                 }
1649         }
1650 #endif
1651
1652         /* We repeat the fd closing here, to make sure that
1653          * nothing is leaked from the PAM modules. Note that
1654          * we are more aggressive this time since socket_fd
1655          * and the netns fds we don't need anymore. The custom
1656          * endpoint fd was needed to upload the policy and can
1657          * now be closed as well. */
1658         r = close_all_fds(fds, n_fds);
1659         if (r >= 0)
1660                 r = shift_fds(fds, n_fds);
1661         if (r >= 0)
1662                 r = flags_fds(fds, n_fds, context->non_blocking);
1663         if (r < 0) {
1664                 *exit_status = EXIT_FDS;
1665                 return r;
1666         }
1667
1668         if (params->apply_permissions) {
1669
1670                 for (i = 0; i < _RLIMIT_MAX; i++) {
1671                         if (!context->rlimit[i])
1672                                 continue;
1673
1674                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1675                                 *exit_status = EXIT_LIMITS;
1676                                 return -errno;
1677                         }
1678                 }
1679
1680                 if (context->capability_bounding_set_drop) {
1681                         r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1682                         if (r < 0) {
1683                                 *exit_status = EXIT_CAPABILITIES;
1684                                 return r;
1685                         }
1686                 }
1687
1688 #ifdef HAVE_SMACK
1689                 if (context->smack_process_label) {
1690                         r = mac_smack_apply_pid(0, context->smack_process_label);
1691                         if (r < 0) {
1692                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1693                                 return r;
1694                         }
1695                 }
1696 #endif
1697
1698                 if (context->user) {
1699                         r = enforce_user(context, uid);
1700                         if (r < 0) {
1701                                 *exit_status = EXIT_USER;
1702                                 return r;
1703                         }
1704                 }
1705
1706                 /* PR_GET_SECUREBITS is not privileged, while
1707                  * PR_SET_SECUREBITS is. So to suppress
1708                  * potential EPERMs we'll try not to call
1709                  * PR_SET_SECUREBITS unless necessary. */
1710                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1711                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1712                                 *exit_status = EXIT_SECUREBITS;
1713                                 return -errno;
1714                         }
1715
1716                 if (context->capabilities)
1717                         if (cap_set_proc(context->capabilities) < 0) {
1718                                 *exit_status = EXIT_CAPABILITIES;
1719                                 return -errno;
1720                         }
1721
1722                 if (context->no_new_privileges)
1723                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1724                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1725                                 return -errno;
1726                         }
1727
1728 #ifdef HAVE_SECCOMP
1729                 if (context->address_families_whitelist ||
1730                     !set_isempty(context->address_families)) {
1731                         r = apply_address_families(context);
1732                         if (r < 0) {
1733                                 *exit_status = EXIT_ADDRESS_FAMILIES;
1734                                 return r;
1735                         }
1736                 }
1737
1738                 if (context->syscall_whitelist ||
1739                     !set_isempty(context->syscall_filter) ||
1740                     !set_isempty(context->syscall_archs)) {
1741                         r = apply_seccomp(context);
1742                         if (r < 0) {
1743                                 *exit_status = EXIT_SECCOMP;
1744                                 return r;
1745                         }
1746                 }
1747 #endif
1748
1749 #ifdef HAVE_SELINUX
1750                 if (mac_selinux_use()) {
1751                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1752
1753                         if (exec_context) {
1754                                 r = setexeccon(exec_context);
1755                                 if (r < 0) {
1756                                         *exit_status = EXIT_SELINUX_CONTEXT;
1757                                         return r;
1758                                 }
1759                         }
1760                 }
1761 #endif
1762
1763 #ifdef HAVE_APPARMOR
1764                 if (context->apparmor_profile && mac_apparmor_use()) {
1765                         r = aa_change_onexec(context->apparmor_profile);
1766                         if (r < 0 && !context->apparmor_profile_ignore) {
1767                                 *exit_status = EXIT_APPARMOR_PROFILE;
1768                                 return -errno;
1769                         }
1770                 }
1771 #endif
1772         }
1773
1774         r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1775         if (r < 0) {
1776                 *exit_status = EXIT_MEMORY;
1777                 return r;
1778         }
1779
1780         final_env = strv_env_merge(5,
1781                                    params->environment,
1782                                    our_env,
1783                                    context->environment,
1784                                    files_env,
1785                                    pam_env,
1786                                    NULL);
1787         if (!final_env) {
1788                 *exit_status = EXIT_MEMORY;
1789                 return -ENOMEM;
1790         }
1791
1792         final_argv = replace_env_argv(argv, final_env);
1793         if (!final_argv) {
1794                 *exit_status = EXIT_MEMORY;
1795                 return -ENOMEM;
1796         }
1797
1798         final_env = strv_env_clean(final_env);
1799
1800         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1801                 _cleanup_free_ char *line;
1802
1803                 line = exec_command_line(final_argv);
1804                 if (line) {
1805                         log_open();
1806                         log_unit_struct(params->unit_id,
1807                                         LOG_DEBUG,
1808                                         "EXECUTABLE=%s", command->path,
1809                                         LOG_MESSAGE("Executing: %s", line),
1810                                         NULL);
1811                         log_close();
1812                 }
1813         }
1814         execve(command->path, final_argv, final_env);
1815         *exit_status = EXIT_EXEC;
1816         return -errno;
1817 }
1818
1819 int exec_spawn(ExecCommand *command,
1820                const ExecContext *context,
1821                const ExecParameters *params,
1822                ExecRuntime *runtime,
1823                pid_t *ret) {
1824
1825         _cleanup_strv_free_ char **files_env = NULL;
1826         int *fds = NULL; unsigned n_fds = 0;
1827         _cleanup_free_ char *line = NULL;
1828         int socket_fd, r;
1829         char **argv;
1830         pid_t pid;
1831
1832         assert(command);
1833         assert(context);
1834         assert(ret);
1835         assert(params);
1836         assert(params->fds || params->n_fds <= 0);
1837
1838         if (context->std_input == EXEC_INPUT_SOCKET ||
1839             context->std_output == EXEC_OUTPUT_SOCKET ||
1840             context->std_error == EXEC_OUTPUT_SOCKET) {
1841
1842                 if (params->n_fds != 1) {
1843                         log_unit_error(params->unit_id, "Got more than one socket.");
1844                         return -EINVAL;
1845                 }
1846
1847                 socket_fd = params->fds[0];
1848         } else {
1849                 socket_fd = -1;
1850                 fds = params->fds;
1851                 n_fds = params->n_fds;
1852         }
1853
1854         r = exec_context_load_environment(context, params->unit_id, &files_env);
1855         if (r < 0)
1856                 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1857
1858         argv = params->argv ?: command->argv;
1859         line = exec_command_line(argv);
1860         if (!line)
1861                 return log_oom();
1862
1863         log_unit_struct(params->unit_id,
1864                         LOG_DEBUG,
1865                         "EXECUTABLE=%s", command->path,
1866                         LOG_MESSAGE("About to execute: %s", line),
1867                         NULL);
1868         pid = fork();
1869         if (pid < 0)
1870                 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1871
1872         if (pid == 0) {
1873                 int exit_status;
1874
1875                 r = exec_child(command,
1876                                context,
1877                                params,
1878                                runtime,
1879                                argv,
1880                                socket_fd,
1881                                fds, n_fds,
1882                                files_env,
1883                                &exit_status);
1884                 if (r < 0) {
1885                         log_open();
1886                         log_unit_struct(params->unit_id,
1887                                         LOG_ERR,
1888                                         LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1889                                         "EXECUTABLE=%s", command->path,
1890                                         LOG_MESSAGE("Failed at step %s spawning %s: %s",
1891                                                     exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1892                                                     command->path, strerror(-r)),
1893                                         LOG_ERRNO(r),
1894                                         NULL);
1895                 }
1896
1897                 _exit(exit_status);
1898         }
1899
1900         log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1901
1902         /* We add the new process to the cgroup both in the child (so
1903          * that we can be sure that no user code is ever executed
1904          * outside of the cgroup) and in the parent (so that we can be
1905          * sure that when we kill the cgroup the process will be
1906          * killed too). */
1907         if (params->cgroup_path)
1908                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1909
1910         exec_status_start(&command->exec_status, pid);
1911
1912         *ret = pid;
1913         return 0;
1914 }
1915
1916 void exec_context_init(ExecContext *c) {
1917         assert(c);
1918
1919         c->umask = 0022;
1920         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1921         c->cpu_sched_policy = SCHED_OTHER;
1922         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1923         c->syslog_level_prefix = true;
1924         c->ignore_sigpipe = true;
1925         c->timer_slack_nsec = NSEC_INFINITY;
1926         c->personality = 0xffffffffUL;
1927         c->runtime_directory_mode = 0755;
1928 }
1929
1930 void exec_context_done(ExecContext *c) {
1931         unsigned l;
1932
1933         assert(c);
1934
1935         strv_free(c->environment);
1936         c->environment = NULL;
1937
1938         strv_free(c->environment_files);
1939         c->environment_files = NULL;
1940
1941         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1942                 free(c->rlimit[l]);
1943                 c->rlimit[l] = NULL;
1944         }
1945
1946         free(c->working_directory);
1947         c->working_directory = NULL;
1948         free(c->root_directory);
1949         c->root_directory = NULL;
1950
1951         free(c->tty_path);
1952         c->tty_path = NULL;
1953
1954         free(c->syslog_identifier);
1955         c->syslog_identifier = NULL;
1956
1957         free(c->user);
1958         c->user = NULL;
1959
1960         free(c->group);
1961         c->group = NULL;
1962
1963         strv_free(c->supplementary_groups);
1964         c->supplementary_groups = NULL;
1965
1966         free(c->pam_name);
1967         c->pam_name = NULL;
1968
1969         if (c->capabilities) {
1970                 cap_free(c->capabilities);
1971                 c->capabilities = NULL;
1972         }
1973
1974         strv_free(c->read_only_dirs);
1975         c->read_only_dirs = NULL;
1976
1977         strv_free(c->read_write_dirs);
1978         c->read_write_dirs = NULL;
1979
1980         strv_free(c->inaccessible_dirs);
1981         c->inaccessible_dirs = NULL;
1982
1983         if (c->cpuset)
1984                 CPU_FREE(c->cpuset);
1985
1986         free(c->utmp_id);
1987         c->utmp_id = NULL;
1988
1989         free(c->selinux_context);
1990         c->selinux_context = NULL;
1991
1992         free(c->apparmor_profile);
1993         c->apparmor_profile = NULL;
1994
1995         set_free(c->syscall_filter);
1996         c->syscall_filter = NULL;
1997
1998         set_free(c->syscall_archs);
1999         c->syscall_archs = NULL;
2000
2001         set_free(c->address_families);
2002         c->address_families = NULL;
2003
2004         strv_free(c->runtime_directory);
2005         c->runtime_directory = NULL;
2006
2007         bus_endpoint_free(c->bus_endpoint);
2008         c->bus_endpoint = NULL;
2009 }
2010
2011 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2012         char **i;
2013
2014         assert(c);
2015
2016         if (!runtime_prefix)
2017                 return 0;
2018
2019         STRV_FOREACH(i, c->runtime_directory) {
2020                 _cleanup_free_ char *p;
2021
2022                 p = strjoin(runtime_prefix, "/", *i, NULL);
2023                 if (!p)
2024                         return -ENOMEM;
2025
2026                 /* We execute this synchronously, since we need to be
2027                  * sure this is gone when we start the service
2028                  * next. */
2029                 rm_rf(p, false, true, false);
2030         }
2031
2032         return 0;
2033 }
2034
2035 void exec_command_done(ExecCommand *c) {
2036         assert(c);
2037
2038         free(c->path);
2039         c->path = NULL;
2040
2041         strv_free(c->argv);
2042         c->argv = NULL;
2043 }
2044
2045 void exec_command_done_array(ExecCommand *c, unsigned n) {
2046         unsigned i;
2047
2048         for (i = 0; i < n; i++)
2049                 exec_command_done(c+i);
2050 }
2051
2052 ExecCommand* exec_command_free_list(ExecCommand *c) {
2053         ExecCommand *i;
2054
2055         while ((i = c)) {
2056                 LIST_REMOVE(command, c, i);
2057                 exec_command_done(i);
2058                 free(i);
2059         }
2060
2061         return NULL;
2062 }
2063
2064 void exec_command_free_array(ExecCommand **c, unsigned n) {
2065         unsigned i;
2066
2067         for (i = 0; i < n; i++)
2068                 c[i] = exec_command_free_list(c[i]);
2069 }
2070
2071 typedef struct InvalidEnvInfo {
2072         const char *unit_id;
2073         const char *path;
2074 } InvalidEnvInfo;
2075
2076 static void invalid_env(const char *p, void *userdata) {
2077         InvalidEnvInfo *info = userdata;
2078
2079         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2080 }
2081
2082 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2083         char **i, **r = NULL;
2084
2085         assert(c);
2086         assert(l);
2087
2088         STRV_FOREACH(i, c->environment_files) {
2089                 char *fn;
2090                 int k;
2091                 bool ignore = false;
2092                 char **p;
2093                 _cleanup_globfree_ glob_t pglob = {};
2094                 int count, n;
2095
2096                 fn = *i;
2097
2098                 if (fn[0] == '-') {
2099                         ignore = true;
2100                         fn ++;
2101                 }
2102
2103                 if (!path_is_absolute(fn)) {
2104                         if (ignore)
2105                                 continue;
2106
2107                         strv_free(r);
2108                         return -EINVAL;
2109                 }
2110
2111                 /* Filename supports globbing, take all matching files */
2112                 errno = 0;
2113                 if (glob(fn, 0, NULL, &pglob) != 0) {
2114                         if (ignore)
2115                                 continue;
2116
2117                         strv_free(r);
2118                         return errno ? -errno : -EINVAL;
2119                 }
2120                 count = pglob.gl_pathc;
2121                 if (count == 0) {
2122                         if (ignore)
2123                                 continue;
2124
2125                         strv_free(r);
2126                         return -EINVAL;
2127                 }
2128                 for (n = 0; n < count; n++) {
2129                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2130                         if (k < 0) {
2131                                 if (ignore)
2132                                         continue;
2133
2134                                 strv_free(r);
2135                                 return k;
2136                         }
2137                         /* Log invalid environment variables with filename */
2138                         if (p) {
2139                                 InvalidEnvInfo info = {
2140                                         .unit_id = unit_id,
2141                                         .path = pglob.gl_pathv[n]
2142                                 };
2143
2144                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2145                         }
2146
2147                         if (r == NULL)
2148                                 r = p;
2149                         else {
2150                                 char **m;
2151
2152                                 m = strv_env_merge(2, r, p);
2153                                 strv_free(r);
2154                                 strv_free(p);
2155                                 if (!m)
2156                                         return -ENOMEM;
2157
2158                                 r = m;
2159                         }
2160                 }
2161         }
2162
2163         *l = r;
2164
2165         return 0;
2166 }
2167
2168 static bool tty_may_match_dev_console(const char *tty) {
2169         _cleanup_free_ char *active = NULL;
2170        char *console;
2171
2172         if (startswith(tty, "/dev/"))
2173                 tty += 5;
2174
2175         /* trivial identity? */
2176         if (streq(tty, "console"))
2177                 return true;
2178
2179         console = resolve_dev_console(&active);
2180         /* if we could not resolve, assume it may */
2181         if (!console)
2182                 return true;
2183
2184         /* "tty0" means the active VC, so it may be the same sometimes */
2185         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2186 }
2187
2188 bool exec_context_may_touch_console(ExecContext *ec) {
2189         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2190                 is_terminal_input(ec->std_input) ||
2191                 is_terminal_output(ec->std_output) ||
2192                 is_terminal_output(ec->std_error)) &&
2193                tty_may_match_dev_console(tty_path(ec));
2194 }
2195
2196 static void strv_fprintf(FILE *f, char **l) {
2197         char **g;
2198
2199         assert(f);
2200
2201         STRV_FOREACH(g, l)
2202                 fprintf(f, " %s", *g);
2203 }
2204
2205 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2206         char **e;
2207         unsigned i;
2208
2209         assert(c);
2210         assert(f);
2211
2212         prefix = strempty(prefix);
2213
2214         fprintf(f,
2215                 "%sUMask: %04o\n"
2216                 "%sWorkingDirectory: %s\n"
2217                 "%sRootDirectory: %s\n"
2218                 "%sNonBlocking: %s\n"
2219                 "%sPrivateTmp: %s\n"
2220                 "%sPrivateNetwork: %s\n"
2221                 "%sPrivateDevices: %s\n"
2222                 "%sProtectHome: %s\n"
2223                 "%sProtectSystem: %s\n"
2224                 "%sIgnoreSIGPIPE: %s\n",
2225                 prefix, c->umask,
2226                 prefix, c->working_directory ? c->working_directory : "/",
2227                 prefix, c->root_directory ? c->root_directory : "/",
2228                 prefix, yes_no(c->non_blocking),
2229                 prefix, yes_no(c->private_tmp),
2230                 prefix, yes_no(c->private_network),
2231                 prefix, yes_no(c->private_devices),
2232                 prefix, protect_home_to_string(c->protect_home),
2233                 prefix, protect_system_to_string(c->protect_system),
2234                 prefix, yes_no(c->ignore_sigpipe));
2235
2236         STRV_FOREACH(e, c->environment)
2237                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2238
2239         STRV_FOREACH(e, c->environment_files)
2240                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2241
2242         if (c->nice_set)
2243                 fprintf(f,
2244                         "%sNice: %i\n",
2245                         prefix, c->nice);
2246
2247         if (c->oom_score_adjust_set)
2248                 fprintf(f,
2249                         "%sOOMScoreAdjust: %i\n",
2250                         prefix, c->oom_score_adjust);
2251
2252         for (i = 0; i < RLIM_NLIMITS; i++)
2253                 if (c->rlimit[i])
2254                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2255                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2256
2257         if (c->ioprio_set) {
2258                 _cleanup_free_ char *class_str = NULL;
2259
2260                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2261                 fprintf(f,
2262                         "%sIOSchedulingClass: %s\n"
2263                         "%sIOPriority: %i\n",
2264                         prefix, strna(class_str),
2265                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2266         }
2267
2268         if (c->cpu_sched_set) {
2269                 _cleanup_free_ char *policy_str = NULL;
2270
2271                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2272                 fprintf(f,
2273                         "%sCPUSchedulingPolicy: %s\n"
2274                         "%sCPUSchedulingPriority: %i\n"
2275                         "%sCPUSchedulingResetOnFork: %s\n",
2276                         prefix, strna(policy_str),
2277                         prefix, c->cpu_sched_priority,
2278                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2279         }
2280
2281         if (c->cpuset) {
2282                 fprintf(f, "%sCPUAffinity:", prefix);
2283                 for (i = 0; i < c->cpuset_ncpus; i++)
2284                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2285                                 fprintf(f, " %u", i);
2286                 fputs("\n", f);
2287         }
2288
2289         if (c->timer_slack_nsec != NSEC_INFINITY)
2290                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2291
2292         fprintf(f,
2293                 "%sStandardInput: %s\n"
2294                 "%sStandardOutput: %s\n"
2295                 "%sStandardError: %s\n",
2296                 prefix, exec_input_to_string(c->std_input),
2297                 prefix, exec_output_to_string(c->std_output),
2298                 prefix, exec_output_to_string(c->std_error));
2299
2300         if (c->tty_path)
2301                 fprintf(f,
2302                         "%sTTYPath: %s\n"
2303                         "%sTTYReset: %s\n"
2304                         "%sTTYVHangup: %s\n"
2305                         "%sTTYVTDisallocate: %s\n",
2306                         prefix, c->tty_path,
2307                         prefix, yes_no(c->tty_reset),
2308                         prefix, yes_no(c->tty_vhangup),
2309                         prefix, yes_no(c->tty_vt_disallocate));
2310
2311         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2312             c->std_output == EXEC_OUTPUT_KMSG ||
2313             c->std_output == EXEC_OUTPUT_JOURNAL ||
2314             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2315             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2316             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2317             c->std_error == EXEC_OUTPUT_SYSLOG ||
2318             c->std_error == EXEC_OUTPUT_KMSG ||
2319             c->std_error == EXEC_OUTPUT_JOURNAL ||
2320             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2321             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2322             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2323
2324                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2325
2326                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2327                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2328
2329                 fprintf(f,
2330                         "%sSyslogFacility: %s\n"
2331                         "%sSyslogLevel: %s\n",
2332                         prefix, strna(fac_str),
2333                         prefix, strna(lvl_str));
2334         }
2335
2336         if (c->capabilities) {
2337                 _cleanup_cap_free_charp_ char *t;
2338
2339                 t = cap_to_text(c->capabilities, NULL);
2340                 if (t)
2341                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2342         }
2343
2344         if (c->secure_bits)
2345                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2346                         prefix,
2347                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2348                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2349                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2350                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2351                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2352                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2353
2354         if (c->capability_bounding_set_drop) {
2355                 unsigned long l;
2356                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2357
2358                 for (l = 0; l <= cap_last_cap(); l++)
2359                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2360                                 fprintf(f, " %s", strna(capability_to_name(l)));
2361
2362                 fputs("\n", f);
2363         }
2364
2365         if (c->user)
2366                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2367         if (c->group)
2368                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2369
2370         if (strv_length(c->supplementary_groups) > 0) {
2371                 fprintf(f, "%sSupplementaryGroups:", prefix);
2372                 strv_fprintf(f, c->supplementary_groups);
2373                 fputs("\n", f);
2374         }
2375
2376         if (c->pam_name)
2377                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2378
2379         if (strv_length(c->read_write_dirs) > 0) {
2380                 fprintf(f, "%sReadWriteDirs:", prefix);
2381                 strv_fprintf(f, c->read_write_dirs);
2382                 fputs("\n", f);
2383         }
2384
2385         if (strv_length(c->read_only_dirs) > 0) {
2386                 fprintf(f, "%sReadOnlyDirs:", prefix);
2387                 strv_fprintf(f, c->read_only_dirs);
2388                 fputs("\n", f);
2389         }
2390
2391         if (strv_length(c->inaccessible_dirs) > 0) {
2392                 fprintf(f, "%sInaccessibleDirs:", prefix);
2393                 strv_fprintf(f, c->inaccessible_dirs);
2394                 fputs("\n", f);
2395         }
2396
2397         if (c->utmp_id)
2398                 fprintf(f,
2399                         "%sUtmpIdentifier: %s\n",
2400                         prefix, c->utmp_id);
2401
2402         if (c->selinux_context)
2403                 fprintf(f,
2404                         "%sSELinuxContext: %s%s\n",
2405                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2406
2407         if (c->personality != 0xffffffffUL)
2408                 fprintf(f,
2409                         "%sPersonality: %s\n",
2410                         prefix, strna(personality_to_string(c->personality)));
2411
2412         if (c->syscall_filter) {
2413 #ifdef HAVE_SECCOMP
2414                 Iterator j;
2415                 void *id;
2416                 bool first = true;
2417 #endif
2418
2419                 fprintf(f,
2420                         "%sSystemCallFilter: ",
2421                         prefix);
2422
2423                 if (!c->syscall_whitelist)
2424                         fputc('~', f);
2425
2426 #ifdef HAVE_SECCOMP
2427                 SET_FOREACH(id, c->syscall_filter, j) {
2428                         _cleanup_free_ char *name = NULL;
2429
2430                         if (first)
2431                                 first = false;
2432                         else
2433                                 fputc(' ', f);
2434
2435                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2436                         fputs(strna(name), f);
2437                 }
2438 #endif
2439
2440                 fputc('\n', f);
2441         }
2442
2443         if (c->syscall_archs) {
2444 #ifdef HAVE_SECCOMP
2445                 Iterator j;
2446                 void *id;
2447 #endif
2448
2449                 fprintf(f,
2450                         "%sSystemCallArchitectures:",
2451                         prefix);
2452
2453 #ifdef HAVE_SECCOMP
2454                 SET_FOREACH(id, c->syscall_archs, j)
2455                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2456 #endif
2457                 fputc('\n', f);
2458         }
2459
2460         if (c->syscall_errno != 0)
2461                 fprintf(f,
2462                         "%sSystemCallErrorNumber: %s\n",
2463                         prefix, strna(errno_to_name(c->syscall_errno)));
2464
2465         if (c->apparmor_profile)
2466                 fprintf(f,
2467                         "%sAppArmorProfile: %s%s\n",
2468                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2469 }
2470
2471 bool exec_context_maintains_privileges(ExecContext *c) {
2472         assert(c);
2473
2474         /* Returns true if the process forked off would run run under
2475          * an unchanged UID or as root. */
2476
2477         if (!c->user)
2478                 return true;
2479
2480         if (streq(c->user, "root") || streq(c->user, "0"))
2481                 return true;
2482
2483         return false;
2484 }
2485
2486 void exec_status_start(ExecStatus *s, pid_t pid) {
2487         assert(s);
2488
2489         zero(*s);
2490         s->pid = pid;
2491         dual_timestamp_get(&s->start_timestamp);
2492 }
2493
2494 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2495         assert(s);
2496
2497         if (s->pid && s->pid != pid)
2498                 zero(*s);
2499
2500         s->pid = pid;
2501         dual_timestamp_get(&s->exit_timestamp);
2502
2503         s->code = code;
2504         s->status = status;
2505
2506         if (context) {
2507                 if (context->utmp_id)
2508                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2509
2510                 exec_context_tty_reset(context);
2511         }
2512 }
2513
2514 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2515         char buf[FORMAT_TIMESTAMP_MAX];
2516
2517         assert(s);
2518         assert(f);
2519
2520         if (s->pid <= 0)
2521                 return;
2522
2523         prefix = strempty(prefix);
2524
2525         fprintf(f,
2526                 "%sPID: "PID_FMT"\n",
2527                 prefix, s->pid);
2528
2529         if (s->start_timestamp.realtime > 0)
2530                 fprintf(f,
2531                         "%sStart Timestamp: %s\n",
2532                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2533
2534         if (s->exit_timestamp.realtime > 0)
2535                 fprintf(f,
2536                         "%sExit Timestamp: %s\n"
2537                         "%sExit Code: %s\n"
2538                         "%sExit Status: %i\n",
2539                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2540                         prefix, sigchld_code_to_string(s->code),
2541                         prefix, s->status);
2542 }
2543
2544 char *exec_command_line(char **argv) {
2545         size_t k;
2546         char *n, *p, **a;
2547         bool first = true;
2548
2549         assert(argv);
2550
2551         k = 1;
2552         STRV_FOREACH(a, argv)
2553                 k += strlen(*a)+3;
2554
2555         if (!(n = new(char, k)))
2556                 return NULL;
2557
2558         p = n;
2559         STRV_FOREACH(a, argv) {
2560
2561                 if (!first)
2562                         *(p++) = ' ';
2563                 else
2564                         first = false;
2565
2566                 if (strpbrk(*a, WHITESPACE)) {
2567                         *(p++) = '\'';
2568                         p = stpcpy(p, *a);
2569                         *(p++) = '\'';
2570                 } else
2571                         p = stpcpy(p, *a);
2572
2573         }
2574
2575         *p = 0;
2576
2577         /* FIXME: this doesn't really handle arguments that have
2578          * spaces and ticks in them */
2579
2580         return n;
2581 }
2582
2583 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2584         _cleanup_free_ char *cmd = NULL;
2585         const char *prefix2;
2586
2587         assert(c);
2588         assert(f);
2589
2590         prefix = strempty(prefix);
2591         prefix2 = strjoina(prefix, "\t");
2592
2593         cmd = exec_command_line(c->argv);
2594         fprintf(f,
2595                 "%sCommand Line: %s\n",
2596                 prefix, cmd ? cmd : strerror(ENOMEM));
2597
2598         exec_status_dump(&c->exec_status, f, prefix2);
2599 }
2600
2601 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2602         assert(f);
2603
2604         prefix = strempty(prefix);
2605
2606         LIST_FOREACH(command, c, c)
2607                 exec_command_dump(c, f, prefix);
2608 }
2609
2610 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2611         ExecCommand *end;
2612
2613         assert(l);
2614         assert(e);
2615
2616         if (*l) {
2617                 /* It's kind of important, that we keep the order here */
2618                 LIST_FIND_TAIL(command, *l, end);
2619                 LIST_INSERT_AFTER(command, *l, end, e);
2620         } else
2621               *l = e;
2622 }
2623
2624 int exec_command_set(ExecCommand *c, const char *path, ...) {
2625         va_list ap;
2626         char **l, *p;
2627
2628         assert(c);
2629         assert(path);
2630
2631         va_start(ap, path);
2632         l = strv_new_ap(path, ap);
2633         va_end(ap);
2634
2635         if (!l)
2636                 return -ENOMEM;
2637
2638         p = strdup(path);
2639         if (!p) {
2640                 strv_free(l);
2641                 return -ENOMEM;
2642         }
2643
2644         free(c->path);
2645         c->path = p;
2646
2647         strv_free(c->argv);
2648         c->argv = l;
2649
2650         return 0;
2651 }
2652
2653 int exec_command_append(ExecCommand *c, const char *path, ...) {
2654         _cleanup_strv_free_ char **l = NULL;
2655         va_list ap;
2656         int r;
2657
2658         assert(c);
2659         assert(path);
2660
2661         va_start(ap, path);
2662         l = strv_new_ap(path, ap);
2663         va_end(ap);
2664
2665         if (!l)
2666                 return -ENOMEM;
2667
2668         r = strv_extend_strv(&c->argv, l);
2669         if (r < 0)
2670                 return r;
2671
2672         return 0;
2673 }
2674
2675
2676 static int exec_runtime_allocate(ExecRuntime **rt) {
2677
2678         if (*rt)
2679                 return 0;
2680
2681         *rt = new0(ExecRuntime, 1);
2682         if (!*rt)
2683                 return -ENOMEM;
2684
2685         (*rt)->n_ref = 1;
2686         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2687
2688         return 0;
2689 }
2690
2691 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2692         int r;
2693
2694         assert(rt);
2695         assert(c);
2696         assert(id);
2697
2698         if (*rt)
2699                 return 1;
2700
2701         if (!c->private_network && !c->private_tmp)
2702                 return 0;
2703
2704         r = exec_runtime_allocate(rt);
2705         if (r < 0)
2706                 return r;
2707
2708         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2709                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2710                         return -errno;
2711         }
2712
2713         if (c->private_tmp && !(*rt)->tmp_dir) {
2714                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2715                 if (r < 0)
2716                         return r;
2717         }
2718
2719         return 1;
2720 }
2721
2722 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2723         assert(r);
2724         assert(r->n_ref > 0);
2725
2726         r->n_ref++;
2727         return r;
2728 }
2729
2730 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2731
2732         if (!r)
2733                 return NULL;
2734
2735         assert(r->n_ref > 0);
2736
2737         r->n_ref--;
2738         if (r->n_ref <= 0) {
2739                 free(r->tmp_dir);
2740                 free(r->var_tmp_dir);
2741                 safe_close_pair(r->netns_storage_socket);
2742                 free(r);
2743         }
2744
2745         return NULL;
2746 }
2747
2748 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2749         assert(u);
2750         assert(f);
2751         assert(fds);
2752
2753         if (!rt)
2754                 return 0;
2755
2756         if (rt->tmp_dir)
2757                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2758
2759         if (rt->var_tmp_dir)
2760                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2761
2762         if (rt->netns_storage_socket[0] >= 0) {
2763                 int copy;
2764
2765                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2766                 if (copy < 0)
2767                         return copy;
2768
2769                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2770         }
2771
2772         if (rt->netns_storage_socket[1] >= 0) {
2773                 int copy;
2774
2775                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2776                 if (copy < 0)
2777                         return copy;
2778
2779                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2780         }
2781
2782         return 0;
2783 }
2784
2785 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2786         int r;
2787
2788         assert(rt);
2789         assert(key);
2790         assert(value);
2791
2792         if (streq(key, "tmp-dir")) {
2793                 char *copy;
2794
2795                 r = exec_runtime_allocate(rt);
2796                 if (r < 0)
2797                         return r;
2798
2799                 copy = strdup(value);
2800                 if (!copy)
2801                         return log_oom();
2802
2803                 free((*rt)->tmp_dir);
2804                 (*rt)->tmp_dir = copy;
2805
2806         } else if (streq(key, "var-tmp-dir")) {
2807                 char *copy;
2808
2809                 r = exec_runtime_allocate(rt);
2810                 if (r < 0)
2811                         return r;
2812
2813                 copy = strdup(value);
2814                 if (!copy)
2815                         return log_oom();
2816
2817                 free((*rt)->var_tmp_dir);
2818                 (*rt)->var_tmp_dir = copy;
2819
2820         } else if (streq(key, "netns-socket-0")) {
2821                 int fd;
2822
2823                 r = exec_runtime_allocate(rt);
2824                 if (r < 0)
2825                         return r;
2826
2827                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2828                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2829                 else {
2830                         safe_close((*rt)->netns_storage_socket[0]);
2831                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2832                 }
2833         } else if (streq(key, "netns-socket-1")) {
2834                 int fd;
2835
2836                 r = exec_runtime_allocate(rt);
2837                 if (r < 0)
2838                         return r;
2839
2840                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2841                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2842                 else {
2843                         safe_close((*rt)->netns_storage_socket[1]);
2844                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2845                 }
2846         } else
2847                 return 0;
2848
2849         return 1;
2850 }
2851
2852 static void *remove_tmpdir_thread(void *p) {
2853         _cleanup_free_ char *path = p;
2854
2855         rm_rf_dangerous(path, false, true, false);
2856         return NULL;
2857 }
2858
2859 void exec_runtime_destroy(ExecRuntime *rt) {
2860         int r;
2861
2862         if (!rt)
2863                 return;
2864
2865         /* If there are multiple users of this, let's leave the stuff around */
2866         if (rt->n_ref > 1)
2867                 return;
2868
2869         if (rt->tmp_dir) {
2870                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2871
2872                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2873                 if (r < 0) {
2874                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2875                         free(rt->tmp_dir);
2876                 }
2877
2878                 rt->tmp_dir = NULL;
2879         }
2880
2881         if (rt->var_tmp_dir) {
2882                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2883
2884                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2885                 if (r < 0) {
2886                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2887                         free(rt->var_tmp_dir);
2888                 }
2889
2890                 rt->var_tmp_dir = NULL;
2891         }
2892
2893         safe_close_pair(rt->netns_storage_socket);
2894 }
2895
2896 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2897         [EXEC_INPUT_NULL] = "null",
2898         [EXEC_INPUT_TTY] = "tty",
2899         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2900         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2901         [EXEC_INPUT_SOCKET] = "socket"
2902 };
2903
2904 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2905
2906 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2907         [EXEC_OUTPUT_INHERIT] = "inherit",
2908         [EXEC_OUTPUT_NULL] = "null",
2909         [EXEC_OUTPUT_TTY] = "tty",
2910         [EXEC_OUTPUT_SYSLOG] = "syslog",
2911         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2912         [EXEC_OUTPUT_KMSG] = "kmsg",
2913         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2914         [EXEC_OUTPUT_JOURNAL] = "journal",
2915         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2916         [EXEC_OUTPUT_SOCKET] = "socket"
2917 };
2918
2919 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);