chiark / gitweb /
7b071e8c9e2a41045f93e472817cce50c005cae9
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #ifdef HAVE_SELINUX
49 #include <selinux/selinux.h>
50 #endif
51
52 #ifdef HAVE_SECCOMP
53 #include <seccomp.h>
54 #endif
55
56 #ifdef HAVE_APPARMOR
57 #include <sys/apparmor.h>
58 #endif
59
60 #include "execute.h"
61 #include "strv.h"
62 #include "macro.h"
63 #include "capability.h"
64 #include "util.h"
65 #include "log.h"
66 #include "sd-messages.h"
67 #include "ioprio.h"
68 #include "securebits.h"
69 #include "namespace.h"
70 #include "exit-status.h"
71 #include "missing.h"
72 #include "utmp-wtmp.h"
73 #include "def.h"
74 #include "path-util.h"
75 #include "env-util.h"
76 #include "fileio.h"
77 #include "unit.h"
78 #include "async.h"
79 #include "selinux-util.h"
80 #include "errno-list.h"
81 #include "af-list.h"
82 #include "mkdir.h"
83 #include "apparmor-util.h"
84 #include "smack-util.h"
85 #include "bus-endpoint.h"
86 #include "label.h"
87 #include "cap-list.h"
88
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95
96 /* This assumes there is a 'tty' group */
97 #define TTY_MODE 0620
98
99 #define SNDBUF_SIZE (8*1024*1024)
100
101 static int shift_fds(int fds[], unsigned n_fds) {
102         int start, restart_from;
103
104         if (n_fds <= 0)
105                 return 0;
106
107         /* Modifies the fds array! (sorts it) */
108
109         assert(fds);
110
111         start = 0;
112         for (;;) {
113                 int i;
114
115                 restart_from = -1;
116
117                 for (i = start; i < (int) n_fds; i++) {
118                         int nfd;
119
120                         /* Already at right index? */
121                         if (fds[i] == i+3)
122                                 continue;
123
124                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
125                                 return -errno;
126
127                         safe_close(fds[i]);
128                         fds[i] = nfd;
129
130                         /* Hmm, the fd we wanted isn't free? Then
131                          * let's remember that and try again from here */
132                         if (nfd != i+3 && restart_from < 0)
133                                 restart_from = i;
134                 }
135
136                 if (restart_from < 0)
137                         break;
138
139                 start = restart_from;
140         }
141
142         return 0;
143 }
144
145 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
146         unsigned i;
147         int r;
148
149         if (n_fds <= 0)
150                 return 0;
151
152         assert(fds);
153
154         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
155
156         for (i = 0; i < n_fds; i++) {
157
158                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159                         return r;
160
161                 /* We unconditionally drop FD_CLOEXEC from the fds,
162                  * since after all we want to pass these fds to our
163                  * children */
164
165                 if ((r = fd_cloexec(fds[i], false)) < 0)
166                         return r;
167         }
168
169         return 0;
170 }
171
172 _pure_ static const char *tty_path(const ExecContext *context) {
173         assert(context);
174
175         if (context->tty_path)
176                 return context->tty_path;
177
178         return "/dev/console";
179 }
180
181 static void exec_context_tty_reset(const ExecContext *context) {
182         assert(context);
183
184         if (context->tty_vhangup)
185                 terminal_vhangup(tty_path(context));
186
187         if (context->tty_reset)
188                 reset_terminal(tty_path(context));
189
190         if (context->tty_vt_disallocate && context->tty_path)
191                 vt_disallocate(context->tty_path);
192 }
193
194 static bool is_terminal_output(ExecOutput o) {
195         return
196                 o == EXEC_OUTPUT_TTY ||
197                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 }
201
202 static int open_null_as(int flags, int nfd) {
203         int fd, r;
204
205         assert(nfd >= 0);
206
207         fd = open("/dev/null", flags|O_NOCTTY);
208         if (fd < 0)
209                 return -errno;
210
211         if (fd != nfd) {
212                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
213                 safe_close(fd);
214         } else
215                 r = nfd;
216
217         return r;
218 }
219
220 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225         uid_t olduid = UID_INVALID;
226         gid_t oldgid = GID_INVALID;
227         int r;
228
229         if (gid != GID_INVALID) {
230                 oldgid = getgid();
231
232                 r = setegid(gid);
233                 if (r < 0)
234                         return -errno;
235         }
236
237         if (uid != UID_INVALID) {
238                 olduid = getuid();
239
240                 r = seteuid(uid);
241                 if (r < 0) {
242                         r = -errno;
243                         goto restore_gid;
244                 }
245         }
246
247         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
248         if (r < 0)
249                 r = -errno;
250
251         /* If we fail to restore the uid or gid, things will likely
252            fail later on. This should only happen if an LSM interferes. */
253
254         if (uid != UID_INVALID)
255                 (void) seteuid(olduid);
256
257  restore_gid:
258         if (gid != GID_INVALID)
259                 (void) setegid(oldgid);
260
261         return r;
262 }
263
264 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
265         int fd, r;
266
267         assert(context);
268         assert(output < _EXEC_OUTPUT_MAX);
269         assert(ident);
270         assert(nfd >= 0);
271
272         fd = socket(AF_UNIX, SOCK_STREAM, 0);
273         if (fd < 0)
274                 return -errno;
275
276         r = connect_journal_socket(fd, uid, gid);
277         if (r < 0)
278                 return r;
279
280         if (shutdown(fd, SHUT_RD) < 0) {
281                 safe_close(fd);
282                 return -errno;
283         }
284
285         fd_inc_sndbuf(fd, SNDBUF_SIZE);
286
287         dprintf(fd,
288                 "%s\n"
289                 "%s\n"
290                 "%i\n"
291                 "%i\n"
292                 "%i\n"
293                 "%i\n"
294                 "%i\n",
295                 context->syslog_identifier ? context->syslog_identifier : ident,
296                 unit_id,
297                 context->syslog_priority,
298                 !!context->syslog_level_prefix,
299                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
300                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
301                 is_terminal_output(output));
302
303         if (fd != nfd) {
304                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
305                 safe_close(fd);
306         } else
307                 r = nfd;
308
309         return r;
310 }
311 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
312         int fd, r;
313
314         assert(path);
315         assert(nfd >= 0);
316
317         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
318                 return fd;
319
320         if (fd != nfd) {
321                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
322                 safe_close(fd);
323         } else
324                 r = nfd;
325
326         return r;
327 }
328
329 static bool is_terminal_input(ExecInput i) {
330         return
331                 i == EXEC_INPUT_TTY ||
332                 i == EXEC_INPUT_TTY_FORCE ||
333                 i == EXEC_INPUT_TTY_FAIL;
334 }
335
336 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
337
338         if (is_terminal_input(std_input) && !apply_tty_stdin)
339                 return EXEC_INPUT_NULL;
340
341         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
342                 return EXEC_INPUT_NULL;
343
344         return std_input;
345 }
346
347 static int fixup_output(ExecOutput std_output, int socket_fd) {
348
349         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
350                 return EXEC_OUTPUT_INHERIT;
351
352         return std_output;
353 }
354
355 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
356         ExecInput i;
357
358         assert(context);
359
360         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
361
362         switch (i) {
363
364         case EXEC_INPUT_NULL:
365                 return open_null_as(O_RDONLY, STDIN_FILENO);
366
367         case EXEC_INPUT_TTY:
368         case EXEC_INPUT_TTY_FORCE:
369         case EXEC_INPUT_TTY_FAIL: {
370                 int fd, r;
371
372                 fd = acquire_terminal(tty_path(context),
373                                       i == EXEC_INPUT_TTY_FAIL,
374                                       i == EXEC_INPUT_TTY_FORCE,
375                                       false,
376                                       USEC_INFINITY);
377                 if (fd < 0)
378                         return fd;
379
380                 if (fd != STDIN_FILENO) {
381                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
382                         safe_close(fd);
383                 } else
384                         r = STDIN_FILENO;
385
386                 return r;
387         }
388
389         case EXEC_INPUT_SOCKET:
390                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
391
392         default:
393                 assert_not_reached("Unknown input type");
394         }
395 }
396
397 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
398         ExecOutput o;
399         ExecInput i;
400         int r;
401
402         assert(context);
403         assert(ident);
404
405         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
406         o = fixup_output(context->std_output, socket_fd);
407
408         if (fileno == STDERR_FILENO) {
409                 ExecOutput e;
410                 e = fixup_output(context->std_error, socket_fd);
411
412                 /* This expects the input and output are already set up */
413
414                 /* Don't change the stderr file descriptor if we inherit all
415                  * the way and are not on a tty */
416                 if (e == EXEC_OUTPUT_INHERIT &&
417                     o == EXEC_OUTPUT_INHERIT &&
418                     i == EXEC_INPUT_NULL &&
419                     !is_terminal_input(context->std_input) &&
420                     getppid () != 1)
421                         return fileno;
422
423                 /* Duplicate from stdout if possible */
424                 if (e == o || e == EXEC_OUTPUT_INHERIT)
425                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
426
427                 o = e;
428
429         } else if (o == EXEC_OUTPUT_INHERIT) {
430                 /* If input got downgraded, inherit the original value */
431                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
432                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
433
434                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
435                 if (i != EXEC_INPUT_NULL)
436                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
437
438                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
439                 if (getppid() != 1)
440                         return fileno;
441
442                 /* We need to open /dev/null here anew, to get the right access mode. */
443                 return open_null_as(O_WRONLY, fileno);
444         }
445
446         switch (o) {
447
448         case EXEC_OUTPUT_NULL:
449                 return open_null_as(O_WRONLY, fileno);
450
451         case EXEC_OUTPUT_TTY:
452                 if (is_terminal_input(i))
453                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
454
455                 /* We don't reset the terminal if this is just about output */
456                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
457
458         case EXEC_OUTPUT_SYSLOG:
459         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
460         case EXEC_OUTPUT_KMSG:
461         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
462         case EXEC_OUTPUT_JOURNAL:
463         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
464                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
465                 if (r < 0) {
466                         log_unit_struct(unit_id,
467                                         LOG_ERR,
468                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
469                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
470                                                     unit_id, strerror(-r)),
471                                         LOG_ERRNO(-r),
472                                         NULL);
473                         r = open_null_as(O_WRONLY, fileno);
474                 }
475                 return r;
476
477         case EXEC_OUTPUT_SOCKET:
478                 assert(socket_fd >= 0);
479                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
480
481         default:
482                 assert_not_reached("Unknown error type");
483         }
484 }
485
486 static int chown_terminal(int fd, uid_t uid) {
487         struct stat st;
488
489         assert(fd >= 0);
490
491         /* This might fail. What matters are the results. */
492         (void) fchown(fd, uid, -1);
493         (void) fchmod(fd, TTY_MODE);
494
495         if (fstat(fd, &st) < 0)
496                 return -errno;
497
498         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
499                 return -EPERM;
500
501         return 0;
502 }
503
504 static int setup_confirm_stdio(int *_saved_stdin,
505                                int *_saved_stdout) {
506         int fd = -1, saved_stdin, saved_stdout = -1, r;
507
508         assert(_saved_stdin);
509         assert(_saved_stdout);
510
511         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
512         if (saved_stdin < 0)
513                 return -errno;
514
515         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516         if (saved_stdout < 0) {
517                 r = errno;
518                 goto fail;
519         }
520
521         fd = acquire_terminal(
522                         "/dev/console",
523                         false,
524                         false,
525                         false,
526                         DEFAULT_CONFIRM_USEC);
527         if (fd < 0) {
528                 r = fd;
529                 goto fail;
530         }
531
532         r = chown_terminal(fd, getuid());
533         if (r < 0)
534                 goto fail;
535
536         if (dup2(fd, STDIN_FILENO) < 0) {
537                 r = -errno;
538                 goto fail;
539         }
540
541         if (dup2(fd, STDOUT_FILENO) < 0) {
542                 r = -errno;
543                 goto fail;
544         }
545
546         if (fd >= 2)
547                 safe_close(fd);
548
549         *_saved_stdin = saved_stdin;
550         *_saved_stdout = saved_stdout;
551
552         return 0;
553
554 fail:
555         safe_close(saved_stdout);
556         safe_close(saved_stdin);
557         safe_close(fd);
558
559         return r;
560 }
561
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563         _cleanup_close_ int fd = -1;
564         va_list ap;
565
566         assert(format);
567
568         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
569         if (fd < 0)
570                 return fd;
571
572         va_start(ap, format);
573         vdprintf(fd, format, ap);
574         va_end(ap);
575
576         return 0;
577 }
578
579 static int restore_confirm_stdio(int *saved_stdin,
580                                  int *saved_stdout) {
581
582         int r = 0;
583
584         assert(saved_stdin);
585         assert(saved_stdout);
586
587         release_terminal();
588
589         if (*saved_stdin >= 0)
590                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
591                         r = -errno;
592
593         if (*saved_stdout >= 0)
594                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
595                         r = -errno;
596
597         safe_close(*saved_stdin);
598         safe_close(*saved_stdout);
599
600         return r;
601 }
602
603 static int ask_for_confirmation(char *response, char **argv) {
604         int saved_stdout = -1, saved_stdin = -1, r;
605         _cleanup_free_ char *line = NULL;
606
607         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
608         if (r < 0)
609                 return r;
610
611         line = exec_command_line(argv);
612         if (!line)
613                 return -ENOMEM;
614
615         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
616
617         restore_confirm_stdio(&saved_stdin, &saved_stdout);
618
619         return r;
620 }
621
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623         bool keep_groups = false;
624         int r;
625
626         assert(context);
627
628         /* Lookup and set GID and supplementary group list. Here too
629          * we avoid NSS lookups for gid=0. */
630
631         if (context->group || username) {
632
633                 if (context->group) {
634                         const char *g = context->group;
635
636                         if ((r = get_group_creds(&g, &gid)) < 0)
637                                 return r;
638                 }
639
640                 /* First step, initialize groups from /etc/groups */
641                 if (username && gid != 0) {
642                         if (initgroups(username, gid) < 0)
643                                 return -errno;
644
645                         keep_groups = true;
646                 }
647
648                 /* Second step, set our gids */
649                 if (setresgid(gid, gid, gid) < 0)
650                         return -errno;
651         }
652
653         if (context->supplementary_groups) {
654                 int ngroups_max, k;
655                 gid_t *gids;
656                 char **i;
657
658                 /* Final step, initialize any manually set supplementary groups */
659                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
660
661                 if (!(gids = new(gid_t, ngroups_max)))
662                         return -ENOMEM;
663
664                 if (keep_groups) {
665                         if ((k = getgroups(ngroups_max, gids)) < 0) {
666                                 free(gids);
667                                 return -errno;
668                         }
669                 } else
670                         k = 0;
671
672                 STRV_FOREACH(i, context->supplementary_groups) {
673                         const char *g;
674
675                         if (k >= ngroups_max) {
676                                 free(gids);
677                                 return -E2BIG;
678                         }
679
680                         g = *i;
681                         r = get_group_creds(&g, gids+k);
682                         if (r < 0) {
683                                 free(gids);
684                                 return r;
685                         }
686
687                         k++;
688                 }
689
690                 if (setgroups(k, gids) < 0) {
691                         free(gids);
692                         return -errno;
693                 }
694
695                 free(gids);
696         }
697
698         return 0;
699 }
700
701 static int enforce_user(const ExecContext *context, uid_t uid) {
702         assert(context);
703
704         /* Sets (but doesn't lookup) the uid and make sure we keep the
705          * capabilities while doing so. */
706
707         if (context->capabilities) {
708                 _cleanup_cap_free_ cap_t d = NULL;
709                 static const cap_value_t bits[] = {
710                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
711                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
712                 };
713
714                 /* First step: If we need to keep capabilities but
715                  * drop privileges we need to make sure we keep our
716                  * caps, while we drop privileges. */
717                 if (uid != 0) {
718                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
719
720                         if (prctl(PR_GET_SECUREBITS) != sb)
721                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
722                                         return -errno;
723                 }
724
725                 /* Second step: set the capabilities. This will reduce
726                  * the capabilities to the minimum we need. */
727
728                 d = cap_dup(context->capabilities);
729                 if (!d)
730                         return -errno;
731
732                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
733                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
734                         return -errno;
735
736                 if (cap_set_proc(d) < 0)
737                         return -errno;
738         }
739
740         /* Third step: actually set the uids */
741         if (setresuid(uid, uid, uid) < 0)
742                 return -errno;
743
744         /* At this point we should have all necessary capabilities but
745            are otherwise a normal user. However, the caps might got
746            corrupted due to the setresuid() so we need clean them up
747            later. This is done outside of this call. */
748
749         return 0;
750 }
751
752 #ifdef HAVE_PAM
753
754 static int null_conv(
755                 int num_msg,
756                 const struct pam_message **msg,
757                 struct pam_response **resp,
758                 void *appdata_ptr) {
759
760         /* We don't support conversations */
761
762         return PAM_CONV_ERR;
763 }
764
765 static int setup_pam(
766                 const char *name,
767                 const char *user,
768                 uid_t uid,
769                 const char *tty,
770                 char ***pam_env,
771                 int fds[], unsigned n_fds) {
772
773         static const struct pam_conv conv = {
774                 .conv = null_conv,
775                 .appdata_ptr = NULL
776         };
777
778         pam_handle_t *handle = NULL;
779         sigset_t ss, old_ss;
780         int pam_code = PAM_SUCCESS;
781         int err;
782         char **e = NULL;
783         bool close_session = false;
784         pid_t pam_pid = 0, parent_pid;
785         int flags = 0;
786
787         assert(name);
788         assert(user);
789         assert(pam_env);
790
791         /* We set up PAM in the parent process, then fork. The child
792          * will then stay around until killed via PR_GET_PDEATHSIG or
793          * systemd via the cgroup logic. It will then remove the PAM
794          * session again. The parent process will exec() the actual
795          * daemon. We do things this way to ensure that the main PID
796          * of the daemon is the one we initially fork()ed. */
797
798         if (log_get_max_level() < LOG_DEBUG)
799                 flags |= PAM_SILENT;
800
801         pam_code = pam_start(name, user, &conv, &handle);
802         if (pam_code != PAM_SUCCESS) {
803                 handle = NULL;
804                 goto fail;
805         }
806
807         if (tty) {
808                 pam_code = pam_set_item(handle, PAM_TTY, tty);
809                 if (pam_code != PAM_SUCCESS)
810                         goto fail;
811         }
812
813         pam_code = pam_acct_mgmt(handle, flags);
814         if (pam_code != PAM_SUCCESS)
815                 goto fail;
816
817         pam_code = pam_open_session(handle, flags);
818         if (pam_code != PAM_SUCCESS)
819                 goto fail;
820
821         close_session = true;
822
823         e = pam_getenvlist(handle);
824         if (!e) {
825                 pam_code = PAM_BUF_ERR;
826                 goto fail;
827         }
828
829         /* Block SIGTERM, so that we know that it won't get lost in
830          * the child */
831         if (sigemptyset(&ss) < 0 ||
832             sigaddset(&ss, SIGTERM) < 0 ||
833             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
834                 goto fail;
835
836         parent_pid = getpid();
837
838         pam_pid = fork();
839         if (pam_pid < 0)
840                 goto fail;
841
842         if (pam_pid == 0) {
843                 int sig;
844                 int r = EXIT_PAM;
845
846                 /* The child's job is to reset the PAM session on
847                  * termination */
848
849                 /* This string must fit in 10 chars (i.e. the length
850                  * of "/sbin/init"), to look pretty in /bin/ps */
851                 rename_process("(sd-pam)");
852
853                 /* Make sure we don't keep open the passed fds in this
854                 child. We assume that otherwise only those fds are
855                 open here that have been opened by PAM. */
856                 close_many(fds, n_fds);
857
858                 /* Drop privileges - we don't need any to pam_close_session
859                  * and this will make PR_SET_PDEATHSIG work in most cases.
860                  * If this fails, ignore the error - but expect sd-pam threads
861                  * to fail to exit normally */
862                 if (setresuid(uid, uid, uid) < 0)
863                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
864
865                 /* Wait until our parent died. This will only work if
866                  * the above setresuid() succeeds, otherwise the kernel
867                  * will not allow unprivileged parents kill their privileged
868                  * children this way. We rely on the control groups kill logic
869                  * to do the rest for us. */
870                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
871                         goto child_finish;
872
873                 /* Check if our parent process might already have
874                  * died? */
875                 if (getppid() == parent_pid) {
876                         for (;;) {
877                                 if (sigwait(&ss, &sig) < 0) {
878                                         if (errno == EINTR)
879                                                 continue;
880
881                                         goto child_finish;
882                                 }
883
884                                 assert(sig == SIGTERM);
885                                 break;
886                         }
887                 }
888
889                 /* If our parent died we'll end the session */
890                 if (getppid() != parent_pid) {
891                         pam_code = pam_close_session(handle, flags);
892                         if (pam_code != PAM_SUCCESS)
893                                 goto child_finish;
894                 }
895
896                 r = 0;
897
898         child_finish:
899                 pam_end(handle, pam_code | flags);
900                 _exit(r);
901         }
902
903         /* If the child was forked off successfully it will do all the
904          * cleanups, so forget about the handle here. */
905         handle = NULL;
906
907         /* Unblock SIGTERM again in the parent */
908         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
909                 goto fail;
910
911         /* We close the log explicitly here, since the PAM modules
912          * might have opened it, but we don't want this fd around. */
913         closelog();
914
915         *pam_env = e;
916         e = NULL;
917
918         return 0;
919
920 fail:
921         if (pam_code != PAM_SUCCESS) {
922                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
923                 err = -EPERM;  /* PAM errors do not map to errno */
924         } else {
925                 log_error_errno(errno, "PAM failed: %m");
926                 err = -errno;
927         }
928
929         if (handle) {
930                 if (close_session)
931                         pam_code = pam_close_session(handle, flags);
932
933                 pam_end(handle, pam_code | flags);
934         }
935
936         strv_free(e);
937
938         closelog();
939
940         if (pam_pid > 1) {
941                 kill(pam_pid, SIGTERM);
942                 kill(pam_pid, SIGCONT);
943         }
944
945         return err;
946 }
947 #endif
948
949 static void rename_process_from_path(const char *path) {
950         char process_name[11];
951         const char *p;
952         size_t l;
953
954         /* This resulting string must fit in 10 chars (i.e. the length
955          * of "/sbin/init") to look pretty in /bin/ps */
956
957         p = basename(path);
958         if (isempty(p)) {
959                 rename_process("(...)");
960                 return;
961         }
962
963         l = strlen(p);
964         if (l > 8) {
965                 /* The end of the process name is usually more
966                  * interesting, since the first bit might just be
967                  * "systemd-" */
968                 p = p + l - 8;
969                 l = 8;
970         }
971
972         process_name[0] = '(';
973         memcpy(process_name+1, p, l);
974         process_name[1+l] = ')';
975         process_name[1+l+1] = 0;
976
977         rename_process(process_name);
978 }
979
980 #ifdef HAVE_SECCOMP
981
982 static int apply_seccomp(const ExecContext *c) {
983         uint32_t negative_action, action;
984         scmp_filter_ctx *seccomp;
985         Iterator i;
986         void *id;
987         int r;
988
989         assert(c);
990
991         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
992
993         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
994         if (!seccomp)
995                 return -ENOMEM;
996
997         if (c->syscall_archs) {
998
999                 SET_FOREACH(id, c->syscall_archs, i) {
1000                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1001                         if (r == -EEXIST)
1002                                 continue;
1003                         if (r < 0)
1004                                 goto finish;
1005                 }
1006
1007         } else {
1008                 r = seccomp_add_secondary_archs(seccomp);
1009                 if (r < 0)
1010                         goto finish;
1011         }
1012
1013         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1014         SET_FOREACH(id, c->syscall_filter, i) {
1015                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1016                 if (r < 0)
1017                         goto finish;
1018         }
1019
1020         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1021         if (r < 0)
1022                 goto finish;
1023
1024         r = seccomp_load(seccomp);
1025
1026 finish:
1027         seccomp_release(seccomp);
1028         return r;
1029 }
1030
1031 static int apply_address_families(const ExecContext *c) {
1032         scmp_filter_ctx *seccomp;
1033         Iterator i;
1034         int r;
1035
1036         assert(c);
1037
1038         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1039         if (!seccomp)
1040                 return -ENOMEM;
1041
1042         r = seccomp_add_secondary_archs(seccomp);
1043         if (r < 0)
1044                 goto finish;
1045
1046         if (c->address_families_whitelist) {
1047                 int af, first = 0, last = 0;
1048                 void *afp;
1049
1050                 /* If this is a whitelist, we first block the address
1051                  * families that are out of range and then everything
1052                  * that is not in the set. First, we find the lowest
1053                  * and highest address family in the set. */
1054
1055                 SET_FOREACH(afp, c->address_families, i) {
1056                         af = PTR_TO_INT(afp);
1057
1058                         if (af <= 0 || af >= af_max())
1059                                 continue;
1060
1061                         if (first == 0 || af < first)
1062                                 first = af;
1063
1064                         if (last == 0 || af > last)
1065                                 last = af;
1066                 }
1067
1068                 assert((first == 0) == (last == 0));
1069
1070                 if (first == 0) {
1071
1072                         /* No entries in the valid range, block everything */
1073                         r = seccomp_rule_add(
1074                                         seccomp,
1075                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076                                         SCMP_SYS(socket),
1077                                         0);
1078                         if (r < 0)
1079                                 goto finish;
1080
1081                 } else {
1082
1083                         /* Block everything below the first entry */
1084                         r = seccomp_rule_add(
1085                                         seccomp,
1086                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1087                                         SCMP_SYS(socket),
1088                                         1,
1089                                         SCMP_A0(SCMP_CMP_LT, first));
1090                         if (r < 0)
1091                                 goto finish;
1092
1093                         /* Block everything above the last entry */
1094                         r = seccomp_rule_add(
1095                                         seccomp,
1096                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097                                         SCMP_SYS(socket),
1098                                         1,
1099                                         SCMP_A0(SCMP_CMP_GT, last));
1100                         if (r < 0)
1101                                 goto finish;
1102
1103                         /* Block everything between the first and last
1104                          * entry */
1105                         for (af = 1; af < af_max(); af++) {
1106
1107                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1108                                         continue;
1109
1110                                 r = seccomp_rule_add(
1111                                                 seccomp,
1112                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1113                                                 SCMP_SYS(socket),
1114                                                 1,
1115                                                 SCMP_A0(SCMP_CMP_EQ, af));
1116                                 if (r < 0)
1117                                         goto finish;
1118                         }
1119                 }
1120
1121         } else {
1122                 void *af;
1123
1124                 /* If this is a blacklist, then generate one rule for
1125                  * each address family that are then combined in OR
1126                  * checks. */
1127
1128                 SET_FOREACH(af, c->address_families, i) {
1129
1130                         r = seccomp_rule_add(
1131                                         seccomp,
1132                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1133                                         SCMP_SYS(socket),
1134                                         1,
1135                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1136                         if (r < 0)
1137                                 goto finish;
1138                 }
1139         }
1140
1141         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1142         if (r < 0)
1143                 goto finish;
1144
1145         r = seccomp_load(seccomp);
1146
1147 finish:
1148         seccomp_release(seccomp);
1149         return r;
1150 }
1151
1152 #endif
1153
1154 static void do_idle_pipe_dance(int idle_pipe[4]) {
1155         assert(idle_pipe);
1156
1157
1158         safe_close(idle_pipe[1]);
1159         safe_close(idle_pipe[2]);
1160
1161         if (idle_pipe[0] >= 0) {
1162                 int r;
1163
1164                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1165
1166                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1167                         /* Signal systemd that we are bored and want to continue. */
1168                         write(idle_pipe[3], "x", 1);
1169
1170                         /* Wait for systemd to react to the signal above. */
1171                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1172                 }
1173
1174                 safe_close(idle_pipe[0]);
1175
1176         }
1177
1178         safe_close(idle_pipe[3]);
1179 }
1180
1181 static int build_environment(
1182                 const ExecContext *c,
1183                 unsigned n_fds,
1184                 usec_t watchdog_usec,
1185                 const char *home,
1186                 const char *username,
1187                 const char *shell,
1188                 char ***ret) {
1189
1190         _cleanup_strv_free_ char **our_env = NULL;
1191         unsigned n_env = 0;
1192         char *x;
1193
1194         assert(c);
1195         assert(ret);
1196
1197         our_env = new0(char*, 10);
1198         if (!our_env)
1199                 return -ENOMEM;
1200
1201         if (n_fds > 0) {
1202                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205
1206                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1207                         return -ENOMEM;
1208                 our_env[n_env++] = x;
1209         }
1210
1211         if (watchdog_usec > 0) {
1212                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1213                         return -ENOMEM;
1214                 our_env[n_env++] = x;
1215
1216                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1217                         return -ENOMEM;
1218                 our_env[n_env++] = x;
1219         }
1220
1221         if (home) {
1222                 x = strappend("HOME=", home);
1223                 if (!x)
1224                         return -ENOMEM;
1225                 our_env[n_env++] = x;
1226         }
1227
1228         if (username) {
1229                 x = strappend("LOGNAME=", username);
1230                 if (!x)
1231                         return -ENOMEM;
1232                 our_env[n_env++] = x;
1233
1234                 x = strappend("USER=", username);
1235                 if (!x)
1236                         return -ENOMEM;
1237                 our_env[n_env++] = x;
1238         }
1239
1240         if (shell) {
1241                 x = strappend("SHELL=", shell);
1242                 if (!x)
1243                         return -ENOMEM;
1244                 our_env[n_env++] = x;
1245         }
1246
1247         if (is_terminal_input(c->std_input) ||
1248             c->std_output == EXEC_OUTPUT_TTY ||
1249             c->std_error == EXEC_OUTPUT_TTY ||
1250             c->tty_path) {
1251
1252                 x = strdup(default_term_for_tty(tty_path(c)));
1253                 if (!x)
1254                         return -ENOMEM;
1255                 our_env[n_env++] = x;
1256         }
1257
1258         our_env[n_env++] = NULL;
1259         assert(n_env <= 10);
1260
1261         *ret = our_env;
1262         our_env = NULL;
1263
1264         return 0;
1265 }
1266
1267 static int exec_child(
1268                 ExecCommand *command,
1269                 const ExecContext *context,
1270                 const ExecParameters *params,
1271                 ExecRuntime *runtime,
1272                 char **argv,
1273                 int socket_fd,
1274                 int *fds, unsigned n_fds,
1275                 char **files_env,
1276                 int *exit_status) {
1277
1278         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1279         _cleanup_free_ char *mac_selinux_context_net = NULL;
1280         const char *username = NULL, *home = NULL, *shell = NULL;
1281         unsigned n_dont_close = 0;
1282         int dont_close[n_fds + 4];
1283         uid_t uid = UID_INVALID;
1284         gid_t gid = GID_INVALID;
1285         int i, r;
1286
1287         assert(command);
1288         assert(context);
1289         assert(params);
1290         assert(exit_status);
1291
1292         rename_process_from_path(command->path);
1293
1294         /* We reset exactly these signals, since they are the
1295          * only ones we set to SIG_IGN in the main daemon. All
1296          * others we leave untouched because we set them to
1297          * SIG_DFL or a valid handler initially, both of which
1298          * will be demoted to SIG_DFL. */
1299         default_signals(SIGNALS_CRASH_HANDLER,
1300                         SIGNALS_IGNORE, -1);
1301
1302         if (context->ignore_sigpipe)
1303                 ignore_signals(SIGPIPE, -1);
1304
1305         r = reset_signal_mask();
1306         if (r < 0) {
1307                 *exit_status = EXIT_SIGNAL_MASK;
1308                 return r;
1309         }
1310
1311         if (params->idle_pipe)
1312                 do_idle_pipe_dance(params->idle_pipe);
1313
1314         /* Close sockets very early to make sure we don't
1315          * block init reexecution because it cannot bind its
1316          * sockets */
1317
1318         log_forget_fds();
1319
1320         if (socket_fd >= 0)
1321                 dont_close[n_dont_close++] = socket_fd;
1322         if (n_fds > 0) {
1323                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324                 n_dont_close += n_fds;
1325         }
1326         if (params->bus_endpoint_fd >= 0)
1327                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1328         if (runtime) {
1329                 if (runtime->netns_storage_socket[0] >= 0)
1330                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331                 if (runtime->netns_storage_socket[1] >= 0)
1332                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1333         }
1334
1335         r = close_all_fds(dont_close, n_dont_close);
1336         if (r < 0) {
1337                 *exit_status = EXIT_FDS;
1338                 return r;
1339         }
1340
1341         if (!context->same_pgrp)
1342                 if (setsid() < 0) {
1343                         *exit_status = EXIT_SETSID;
1344                         return -errno;
1345                 }
1346
1347         exec_context_tty_reset(context);
1348
1349         if (params->confirm_spawn) {
1350                 char response;
1351
1352                 r = ask_for_confirmation(&response, argv);
1353                 if (r == -ETIMEDOUT)
1354                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1355                 else if (r < 0)
1356                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1357                 else if (response == 's') {
1358                         write_confirm_message("Skipping execution.\n");
1359                         *exit_status = EXIT_CONFIRM;
1360                         return -ECANCELED;
1361                 } else if (response == 'n') {
1362                         write_confirm_message("Failing execution.\n");
1363                         *exit_status = 0;
1364                         return 0;
1365                 }
1366         }
1367
1368         if (context->user) {
1369                 username = context->user;
1370                 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1371                 if (r < 0) {
1372                         *exit_status = EXIT_USER;
1373                         return r;
1374                 }
1375         }
1376
1377         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378          * must sure to drop O_NONBLOCK */
1379         if (socket_fd >= 0)
1380                 fd_nonblock(socket_fd, false);
1381
1382         r = setup_input(context, socket_fd, params->apply_tty_stdin);
1383         if (r < 0) {
1384                 *exit_status = EXIT_STDIN;
1385                 return r;
1386         }
1387
1388         r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389         if (r < 0) {
1390                 *exit_status = EXIT_STDOUT;
1391                 return r;
1392         }
1393
1394         r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1395         if (r < 0) {
1396                 *exit_status = EXIT_STDERR;
1397                 return r;
1398         }
1399
1400         if (params->cgroup_path) {
1401                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1402                 if (r < 0) {
1403                         *exit_status = EXIT_CGROUP;
1404                         return r;
1405                 }
1406         }
1407
1408         if (context->oom_score_adjust_set) {
1409                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1410
1411                 /* When we can't make this change due to EPERM, then
1412                  * let's silently skip over it. User namespaces
1413                  * prohibit write access to this file, and we
1414                  * shouldn't trip up over that. */
1415
1416                 sprintf(t, "%i", context->oom_score_adjust);
1417                 r = write_string_file("/proc/self/oom_score_adj", t);
1418                 if (r == -EPERM || r == -EACCES) {
1419                         log_open();
1420                         log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1421                         log_close();
1422                 } else if (r < 0) {
1423                         *exit_status = EXIT_OOM_ADJUST;
1424                         return -errno;
1425                 }
1426         }
1427
1428         if (context->nice_set)
1429                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1430                         *exit_status = EXIT_NICE;
1431                         return -errno;
1432                 }
1433
1434         if (context->cpu_sched_set) {
1435                 struct sched_param param = {
1436                         .sched_priority = context->cpu_sched_priority,
1437                 };
1438
1439                 r = sched_setscheduler(0,
1440                                        context->cpu_sched_policy |
1441                                        (context->cpu_sched_reset_on_fork ?
1442                                         SCHED_RESET_ON_FORK : 0),
1443                                        &param);
1444                 if (r < 0) {
1445                         *exit_status = EXIT_SETSCHEDULER;
1446                         return -errno;
1447                 }
1448         }
1449
1450         if (context->cpuset)
1451                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1452                         *exit_status = EXIT_CPUAFFINITY;
1453                         return -errno;
1454                 }
1455
1456         if (context->ioprio_set)
1457                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1458                         *exit_status = EXIT_IOPRIO;
1459                         return -errno;
1460                 }
1461
1462         if (context->timer_slack_nsec != NSEC_INFINITY)
1463                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1464                         *exit_status = EXIT_TIMERSLACK;
1465                         return -errno;
1466                 }
1467
1468         if (context->personality != 0xffffffffUL)
1469                 if (personality(context->personality) < 0) {
1470                         *exit_status = EXIT_PERSONALITY;
1471                         return -errno;
1472                 }
1473
1474         if (context->utmp_id)
1475                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1476
1477         if (context->user && is_terminal_input(context->std_input)) {
1478                 r = chown_terminal(STDIN_FILENO, uid);
1479                 if (r < 0) {
1480                         *exit_status = EXIT_STDIN;
1481                         return r;
1482                 }
1483         }
1484
1485 #ifdef ENABLE_KDBUS
1486         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1487                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1488
1489                 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1490                 if (r < 0) {
1491                         *exit_status = EXIT_BUS_ENDPOINT;
1492                         return r;
1493                 }
1494         }
1495 #endif
1496
1497         /* If delegation is enabled we'll pass ownership of the cgroup
1498          * (but only in systemd's own controller hierarchy!) to the
1499          * user of the new process. */
1500         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1501                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1502                 if (r < 0) {
1503                         *exit_status = EXIT_CGROUP;
1504                         return r;
1505                 }
1506
1507
1508                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1509                 if (r < 0) {
1510                         *exit_status = EXIT_CGROUP;
1511                         return r;
1512                 }
1513         }
1514
1515         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1516                 char **rt;
1517
1518                 STRV_FOREACH(rt, context->runtime_directory) {
1519                         _cleanup_free_ char *p;
1520
1521                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1522                         if (!p) {
1523                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1524                                 return -ENOMEM;
1525                         }
1526
1527                         r = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1528                         if (r < 0) {
1529                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1530                                 return r;
1531                         }
1532                 }
1533         }
1534
1535         if (params->apply_permissions) {
1536                 r = enforce_groups(context, username, gid);
1537                 if (r < 0) {
1538                         *exit_status = EXIT_GROUP;
1539                         return r;
1540                 }
1541         }
1542
1543         umask(context->umask);
1544
1545 #ifdef HAVE_PAM
1546         if (params->apply_permissions && context->pam_name && username) {
1547                 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1548                 if (r < 0) {
1549                         *exit_status = EXIT_PAM;
1550                         return r;
1551                 }
1552         }
1553 #endif
1554
1555         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1556                 r = setup_netns(runtime->netns_storage_socket);
1557                 if (r < 0) {
1558                         *exit_status = EXIT_NETWORK;
1559                         return r;
1560                 }
1561         }
1562
1563         if (!strv_isempty(context->read_write_dirs) ||
1564             !strv_isempty(context->read_only_dirs) ||
1565             !strv_isempty(context->inaccessible_dirs) ||
1566             context->mount_flags != 0 ||
1567             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1568             params->bus_endpoint_path ||
1569             context->private_devices ||
1570             context->protect_system != PROTECT_SYSTEM_NO ||
1571             context->protect_home != PROTECT_HOME_NO) {
1572
1573                 char *tmp = NULL, *var = NULL;
1574
1575                 /* The runtime struct only contains the parent
1576                  * of the private /tmp, which is
1577                  * non-accessible to world users. Inside of it
1578                  * there's a /tmp that is sticky, and that's
1579                  * the one we want to use here. */
1580
1581                 if (context->private_tmp && runtime) {
1582                         if (runtime->tmp_dir)
1583                                 tmp = strjoina(runtime->tmp_dir, "/tmp");
1584                         if (runtime->var_tmp_dir)
1585                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
1586                 }
1587
1588                 r = setup_namespace(
1589                                 context->read_write_dirs,
1590                                 context->read_only_dirs,
1591                                 context->inaccessible_dirs,
1592                                 tmp,
1593                                 var,
1594                                 params->bus_endpoint_path,
1595                                 context->private_devices,
1596                                 context->protect_home,
1597                                 context->protect_system,
1598                                 context->mount_flags);
1599
1600                 /* If we couldn't set up the namespace this is
1601                  * probably due to a missing capability. In this case,
1602                  * silently proceeed. */
1603                 if (r == -EPERM || r == -EACCES) {
1604                         log_open();
1605                         log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1606                         log_close();
1607                 } else if (r < 0) {
1608                         *exit_status = EXIT_NAMESPACE;
1609                         return r;
1610                 }
1611         }
1612
1613         if (params->apply_chroot) {
1614                 if (context->root_directory)
1615                         if (chroot(context->root_directory) < 0) {
1616                                 *exit_status = EXIT_CHROOT;
1617                                 return -errno;
1618                         }
1619
1620                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1621                         *exit_status = EXIT_CHDIR;
1622                         return -errno;
1623                 }
1624         } else {
1625                 _cleanup_free_ char *d = NULL;
1626
1627                 if (asprintf(&d, "%s/%s",
1628                              context->root_directory ? context->root_directory : "",
1629                              context->working_directory ? context->working_directory : "") < 0) {
1630                         *exit_status = EXIT_MEMORY;
1631                         return -ENOMEM;
1632                 }
1633
1634                 if (chdir(d) < 0) {
1635                         *exit_status = EXIT_CHDIR;
1636                         return -errno;
1637                 }
1638         }
1639
1640 #ifdef HAVE_SELINUX
1641         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1642                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1643                 if (r < 0) {
1644                         *exit_status = EXIT_SELINUX_CONTEXT;
1645                         return r;
1646                 }
1647         }
1648 #endif
1649
1650         /* We repeat the fd closing here, to make sure that
1651          * nothing is leaked from the PAM modules. Note that
1652          * we are more aggressive this time since socket_fd
1653          * and the netns fds we don't need anymore. The custom
1654          * endpoint fd was needed to upload the policy and can
1655          * now be closed as well. */
1656         r = close_all_fds(fds, n_fds);
1657         if (r >= 0)
1658                 r = shift_fds(fds, n_fds);
1659         if (r >= 0)
1660                 r = flags_fds(fds, n_fds, context->non_blocking);
1661         if (r < 0) {
1662                 *exit_status = EXIT_FDS;
1663                 return r;
1664         }
1665
1666         if (params->apply_permissions) {
1667
1668                 for (i = 0; i < _RLIMIT_MAX; i++) {
1669                         if (!context->rlimit[i])
1670                                 continue;
1671
1672                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1673                                 *exit_status = EXIT_LIMITS;
1674                                 return -errno;
1675                         }
1676                 }
1677
1678                 if (context->capability_bounding_set_drop) {
1679                         r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1680                         if (r < 0) {
1681                                 *exit_status = EXIT_CAPABILITIES;
1682                                 return r;
1683                         }
1684                 }
1685
1686 #ifdef HAVE_SMACK
1687                 if (context->smack_process_label) {
1688                         r = mac_smack_apply_pid(0, context->smack_process_label);
1689                         if (r < 0) {
1690                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1691                                 return r;
1692                         }
1693                 }
1694 #endif
1695
1696                 if (context->user) {
1697                         r = enforce_user(context, uid);
1698                         if (r < 0) {
1699                                 *exit_status = EXIT_USER;
1700                                 return r;
1701                         }
1702                 }
1703
1704                 /* PR_GET_SECUREBITS is not privileged, while
1705                  * PR_SET_SECUREBITS is. So to suppress
1706                  * potential EPERMs we'll try not to call
1707                  * PR_SET_SECUREBITS unless necessary. */
1708                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1709                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1710                                 *exit_status = EXIT_SECUREBITS;
1711                                 return -errno;
1712                         }
1713
1714                 if (context->capabilities)
1715                         if (cap_set_proc(context->capabilities) < 0) {
1716                                 *exit_status = EXIT_CAPABILITIES;
1717                                 return -errno;
1718                         }
1719
1720                 if (context->no_new_privileges)
1721                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1722                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1723                                 return -errno;
1724                         }
1725
1726 #ifdef HAVE_SECCOMP
1727                 if (context->address_families_whitelist ||
1728                     !set_isempty(context->address_families)) {
1729                         r = apply_address_families(context);
1730                         if (r < 0) {
1731                                 *exit_status = EXIT_ADDRESS_FAMILIES;
1732                                 return r;
1733                         }
1734                 }
1735
1736                 if (context->syscall_whitelist ||
1737                     !set_isempty(context->syscall_filter) ||
1738                     !set_isempty(context->syscall_archs)) {
1739                         r = apply_seccomp(context);
1740                         if (r < 0) {
1741                                 *exit_status = EXIT_SECCOMP;
1742                                 return r;
1743                         }
1744                 }
1745 #endif
1746
1747 #ifdef HAVE_SELINUX
1748                 if (mac_selinux_use()) {
1749                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1750
1751                         if (exec_context) {
1752                                 r = setexeccon(exec_context);
1753                                 if (r < 0) {
1754                                         *exit_status = EXIT_SELINUX_CONTEXT;
1755                                         return r;
1756                                 }
1757                         }
1758                 }
1759 #endif
1760
1761 #ifdef HAVE_APPARMOR
1762                 if (context->apparmor_profile && mac_apparmor_use()) {
1763                         r = aa_change_onexec(context->apparmor_profile);
1764                         if (r < 0 && !context->apparmor_profile_ignore) {
1765                                 *exit_status = EXIT_APPARMOR_PROFILE;
1766                                 return -errno;
1767                         }
1768                 }
1769 #endif
1770         }
1771
1772         r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1773         if (r < 0) {
1774                 *exit_status = EXIT_MEMORY;
1775                 return r;
1776         }
1777
1778         final_env = strv_env_merge(5,
1779                                    params->environment,
1780                                    our_env,
1781                                    context->environment,
1782                                    files_env,
1783                                    pam_env,
1784                                    NULL);
1785         if (!final_env) {
1786                 *exit_status = EXIT_MEMORY;
1787                 return -ENOMEM;
1788         }
1789
1790         final_argv = replace_env_argv(argv, final_env);
1791         if (!final_argv) {
1792                 *exit_status = EXIT_MEMORY;
1793                 return -ENOMEM;
1794         }
1795
1796         final_env = strv_env_clean(final_env);
1797
1798         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1799                 _cleanup_free_ char *line;
1800
1801                 line = exec_command_line(final_argv);
1802                 if (line) {
1803                         log_open();
1804                         log_unit_struct(params->unit_id,
1805                                         LOG_DEBUG,
1806                                         "EXECUTABLE=%s", command->path,
1807                                         LOG_MESSAGE("Executing: %s", line),
1808                                         NULL);
1809                         log_close();
1810                 }
1811         }
1812         execve(command->path, final_argv, final_env);
1813         *exit_status = EXIT_EXEC;
1814         return -errno;
1815 }
1816
1817 int exec_spawn(ExecCommand *command,
1818                const ExecContext *context,
1819                const ExecParameters *params,
1820                ExecRuntime *runtime,
1821                pid_t *ret) {
1822
1823         _cleanup_strv_free_ char **files_env = NULL;
1824         int *fds = NULL; unsigned n_fds = 0;
1825         _cleanup_free_ char *line = NULL;
1826         int socket_fd, r;
1827         char **argv;
1828         pid_t pid;
1829
1830         assert(command);
1831         assert(context);
1832         assert(ret);
1833         assert(params);
1834         assert(params->fds || params->n_fds <= 0);
1835
1836         if (context->std_input == EXEC_INPUT_SOCKET ||
1837             context->std_output == EXEC_OUTPUT_SOCKET ||
1838             context->std_error == EXEC_OUTPUT_SOCKET) {
1839
1840                 if (params->n_fds != 1) {
1841                         log_unit_error(params->unit_id, "Got more than one socket.");
1842                         return -EINVAL;
1843                 }
1844
1845                 socket_fd = params->fds[0];
1846         } else {
1847                 socket_fd = -1;
1848                 fds = params->fds;
1849                 n_fds = params->n_fds;
1850         }
1851
1852         r = exec_context_load_environment(context, params->unit_id, &files_env);
1853         if (r < 0)
1854                 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1855
1856         argv = params->argv ?: command->argv;
1857         line = exec_command_line(argv);
1858         if (!line)
1859                 return log_oom();
1860
1861         log_unit_struct(params->unit_id,
1862                         LOG_DEBUG,
1863                         "EXECUTABLE=%s", command->path,
1864                         LOG_MESSAGE("About to execute: %s", line),
1865                         NULL);
1866         pid = fork();
1867         if (pid < 0)
1868                 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1869
1870         if (pid == 0) {
1871                 int exit_status;
1872
1873                 r = exec_child(command,
1874                                context,
1875                                params,
1876                                runtime,
1877                                argv,
1878                                socket_fd,
1879                                fds, n_fds,
1880                                files_env,
1881                                &exit_status);
1882                 if (r < 0) {
1883                         log_open();
1884                         log_unit_struct(params->unit_id,
1885                                         LOG_ERR,
1886                                         LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1887                                         "EXECUTABLE=%s", command->path,
1888                                         LOG_MESSAGE("Failed at step %s spawning %s: %s",
1889                                                     exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1890                                                     command->path, strerror(-r)),
1891                                         LOG_ERRNO(r),
1892                                         NULL);
1893                 }
1894
1895                 _exit(exit_status);
1896         }
1897
1898         log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1899
1900         /* We add the new process to the cgroup both in the child (so
1901          * that we can be sure that no user code is ever executed
1902          * outside of the cgroup) and in the parent (so that we can be
1903          * sure that when we kill the cgroup the process will be
1904          * killed too). */
1905         if (params->cgroup_path)
1906                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1907
1908         exec_status_start(&command->exec_status, pid);
1909
1910         *ret = pid;
1911         return 0;
1912 }
1913
1914 void exec_context_init(ExecContext *c) {
1915         assert(c);
1916
1917         c->umask = 0022;
1918         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1919         c->cpu_sched_policy = SCHED_OTHER;
1920         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1921         c->syslog_level_prefix = true;
1922         c->ignore_sigpipe = true;
1923         c->timer_slack_nsec = NSEC_INFINITY;
1924         c->personality = 0xffffffffUL;
1925         c->runtime_directory_mode = 0755;
1926 }
1927
1928 void exec_context_done(ExecContext *c) {
1929         unsigned l;
1930
1931         assert(c);
1932
1933         strv_free(c->environment);
1934         c->environment = NULL;
1935
1936         strv_free(c->environment_files);
1937         c->environment_files = NULL;
1938
1939         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1940                 free(c->rlimit[l]);
1941                 c->rlimit[l] = NULL;
1942         }
1943
1944         free(c->working_directory);
1945         c->working_directory = NULL;
1946         free(c->root_directory);
1947         c->root_directory = NULL;
1948
1949         free(c->tty_path);
1950         c->tty_path = NULL;
1951
1952         free(c->syslog_identifier);
1953         c->syslog_identifier = NULL;
1954
1955         free(c->user);
1956         c->user = NULL;
1957
1958         free(c->group);
1959         c->group = NULL;
1960
1961         strv_free(c->supplementary_groups);
1962         c->supplementary_groups = NULL;
1963
1964         free(c->pam_name);
1965         c->pam_name = NULL;
1966
1967         if (c->capabilities) {
1968                 cap_free(c->capabilities);
1969                 c->capabilities = NULL;
1970         }
1971
1972         strv_free(c->read_only_dirs);
1973         c->read_only_dirs = NULL;
1974
1975         strv_free(c->read_write_dirs);
1976         c->read_write_dirs = NULL;
1977
1978         strv_free(c->inaccessible_dirs);
1979         c->inaccessible_dirs = NULL;
1980
1981         if (c->cpuset)
1982                 CPU_FREE(c->cpuset);
1983
1984         free(c->utmp_id);
1985         c->utmp_id = NULL;
1986
1987         free(c->selinux_context);
1988         c->selinux_context = NULL;
1989
1990         free(c->apparmor_profile);
1991         c->apparmor_profile = NULL;
1992
1993         set_free(c->syscall_filter);
1994         c->syscall_filter = NULL;
1995
1996         set_free(c->syscall_archs);
1997         c->syscall_archs = NULL;
1998
1999         set_free(c->address_families);
2000         c->address_families = NULL;
2001
2002         strv_free(c->runtime_directory);
2003         c->runtime_directory = NULL;
2004
2005         bus_endpoint_free(c->bus_endpoint);
2006         c->bus_endpoint = NULL;
2007 }
2008
2009 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2010         char **i;
2011
2012         assert(c);
2013
2014         if (!runtime_prefix)
2015                 return 0;
2016
2017         STRV_FOREACH(i, c->runtime_directory) {
2018                 _cleanup_free_ char *p;
2019
2020                 p = strjoin(runtime_prefix, "/", *i, NULL);
2021                 if (!p)
2022                         return -ENOMEM;
2023
2024                 /* We execute this synchronously, since we need to be
2025                  * sure this is gone when we start the service
2026                  * next. */
2027                 rm_rf(p, false, true, false);
2028         }
2029
2030         return 0;
2031 }
2032
2033 void exec_command_done(ExecCommand *c) {
2034         assert(c);
2035
2036         free(c->path);
2037         c->path = NULL;
2038
2039         strv_free(c->argv);
2040         c->argv = NULL;
2041 }
2042
2043 void exec_command_done_array(ExecCommand *c, unsigned n) {
2044         unsigned i;
2045
2046         for (i = 0; i < n; i++)
2047                 exec_command_done(c+i);
2048 }
2049
2050 ExecCommand* exec_command_free_list(ExecCommand *c) {
2051         ExecCommand *i;
2052
2053         while ((i = c)) {
2054                 LIST_REMOVE(command, c, i);
2055                 exec_command_done(i);
2056                 free(i);
2057         }
2058
2059         return NULL;
2060 }
2061
2062 void exec_command_free_array(ExecCommand **c, unsigned n) {
2063         unsigned i;
2064
2065         for (i = 0; i < n; i++)
2066                 c[i] = exec_command_free_list(c[i]);
2067 }
2068
2069 typedef struct InvalidEnvInfo {
2070         const char *unit_id;
2071         const char *path;
2072 } InvalidEnvInfo;
2073
2074 static void invalid_env(const char *p, void *userdata) {
2075         InvalidEnvInfo *info = userdata;
2076
2077         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2078 }
2079
2080 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2081         char **i, **r = NULL;
2082
2083         assert(c);
2084         assert(l);
2085
2086         STRV_FOREACH(i, c->environment_files) {
2087                 char *fn;
2088                 int k;
2089                 bool ignore = false;
2090                 char **p;
2091                 _cleanup_globfree_ glob_t pglob = {};
2092                 int count, n;
2093
2094                 fn = *i;
2095
2096                 if (fn[0] == '-') {
2097                         ignore = true;
2098                         fn ++;
2099                 }
2100
2101                 if (!path_is_absolute(fn)) {
2102                         if (ignore)
2103                                 continue;
2104
2105                         strv_free(r);
2106                         return -EINVAL;
2107                 }
2108
2109                 /* Filename supports globbing, take all matching files */
2110                 errno = 0;
2111                 if (glob(fn, 0, NULL, &pglob) != 0) {
2112                         if (ignore)
2113                                 continue;
2114
2115                         strv_free(r);
2116                         return errno ? -errno : -EINVAL;
2117                 }
2118                 count = pglob.gl_pathc;
2119                 if (count == 0) {
2120                         if (ignore)
2121                                 continue;
2122
2123                         strv_free(r);
2124                         return -EINVAL;
2125                 }
2126                 for (n = 0; n < count; n++) {
2127                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2128                         if (k < 0) {
2129                                 if (ignore)
2130                                         continue;
2131
2132                                 strv_free(r);
2133                                 return k;
2134                         }
2135                         /* Log invalid environment variables with filename */
2136                         if (p) {
2137                                 InvalidEnvInfo info = {
2138                                         .unit_id = unit_id,
2139                                         .path = pglob.gl_pathv[n]
2140                                 };
2141
2142                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2143                         }
2144
2145                         if (r == NULL)
2146                                 r = p;
2147                         else {
2148                                 char **m;
2149
2150                                 m = strv_env_merge(2, r, p);
2151                                 strv_free(r);
2152                                 strv_free(p);
2153                                 if (!m)
2154                                         return -ENOMEM;
2155
2156                                 r = m;
2157                         }
2158                 }
2159         }
2160
2161         *l = r;
2162
2163         return 0;
2164 }
2165
2166 static bool tty_may_match_dev_console(const char *tty) {
2167         _cleanup_free_ char *active = NULL;
2168        char *console;
2169
2170         if (startswith(tty, "/dev/"))
2171                 tty += 5;
2172
2173         /* trivial identity? */
2174         if (streq(tty, "console"))
2175                 return true;
2176
2177         console = resolve_dev_console(&active);
2178         /* if we could not resolve, assume it may */
2179         if (!console)
2180                 return true;
2181
2182         /* "tty0" means the active VC, so it may be the same sometimes */
2183         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2184 }
2185
2186 bool exec_context_may_touch_console(ExecContext *ec) {
2187         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2188                 is_terminal_input(ec->std_input) ||
2189                 is_terminal_output(ec->std_output) ||
2190                 is_terminal_output(ec->std_error)) &&
2191                tty_may_match_dev_console(tty_path(ec));
2192 }
2193
2194 static void strv_fprintf(FILE *f, char **l) {
2195         char **g;
2196
2197         assert(f);
2198
2199         STRV_FOREACH(g, l)
2200                 fprintf(f, " %s", *g);
2201 }
2202
2203 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2204         char **e;
2205         unsigned i;
2206
2207         assert(c);
2208         assert(f);
2209
2210         prefix = strempty(prefix);
2211
2212         fprintf(f,
2213                 "%sUMask: %04o\n"
2214                 "%sWorkingDirectory: %s\n"
2215                 "%sRootDirectory: %s\n"
2216                 "%sNonBlocking: %s\n"
2217                 "%sPrivateTmp: %s\n"
2218                 "%sPrivateNetwork: %s\n"
2219                 "%sPrivateDevices: %s\n"
2220                 "%sProtectHome: %s\n"
2221                 "%sProtectSystem: %s\n"
2222                 "%sIgnoreSIGPIPE: %s\n",
2223                 prefix, c->umask,
2224                 prefix, c->working_directory ? c->working_directory : "/",
2225                 prefix, c->root_directory ? c->root_directory : "/",
2226                 prefix, yes_no(c->non_blocking),
2227                 prefix, yes_no(c->private_tmp),
2228                 prefix, yes_no(c->private_network),
2229                 prefix, yes_no(c->private_devices),
2230                 prefix, protect_home_to_string(c->protect_home),
2231                 prefix, protect_system_to_string(c->protect_system),
2232                 prefix, yes_no(c->ignore_sigpipe));
2233
2234         STRV_FOREACH(e, c->environment)
2235                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2236
2237         STRV_FOREACH(e, c->environment_files)
2238                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2239
2240         if (c->nice_set)
2241                 fprintf(f,
2242                         "%sNice: %i\n",
2243                         prefix, c->nice);
2244
2245         if (c->oom_score_adjust_set)
2246                 fprintf(f,
2247                         "%sOOMScoreAdjust: %i\n",
2248                         prefix, c->oom_score_adjust);
2249
2250         for (i = 0; i < RLIM_NLIMITS; i++)
2251                 if (c->rlimit[i])
2252                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2253                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2254
2255         if (c->ioprio_set) {
2256                 _cleanup_free_ char *class_str = NULL;
2257
2258                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2259                 fprintf(f,
2260                         "%sIOSchedulingClass: %s\n"
2261                         "%sIOPriority: %i\n",
2262                         prefix, strna(class_str),
2263                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2264         }
2265
2266         if (c->cpu_sched_set) {
2267                 _cleanup_free_ char *policy_str = NULL;
2268
2269                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2270                 fprintf(f,
2271                         "%sCPUSchedulingPolicy: %s\n"
2272                         "%sCPUSchedulingPriority: %i\n"
2273                         "%sCPUSchedulingResetOnFork: %s\n",
2274                         prefix, strna(policy_str),
2275                         prefix, c->cpu_sched_priority,
2276                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2277         }
2278
2279         if (c->cpuset) {
2280                 fprintf(f, "%sCPUAffinity:", prefix);
2281                 for (i = 0; i < c->cpuset_ncpus; i++)
2282                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2283                                 fprintf(f, " %u", i);
2284                 fputs("\n", f);
2285         }
2286
2287         if (c->timer_slack_nsec != NSEC_INFINITY)
2288                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2289
2290         fprintf(f,
2291                 "%sStandardInput: %s\n"
2292                 "%sStandardOutput: %s\n"
2293                 "%sStandardError: %s\n",
2294                 prefix, exec_input_to_string(c->std_input),
2295                 prefix, exec_output_to_string(c->std_output),
2296                 prefix, exec_output_to_string(c->std_error));
2297
2298         if (c->tty_path)
2299                 fprintf(f,
2300                         "%sTTYPath: %s\n"
2301                         "%sTTYReset: %s\n"
2302                         "%sTTYVHangup: %s\n"
2303                         "%sTTYVTDisallocate: %s\n",
2304                         prefix, c->tty_path,
2305                         prefix, yes_no(c->tty_reset),
2306                         prefix, yes_no(c->tty_vhangup),
2307                         prefix, yes_no(c->tty_vt_disallocate));
2308
2309         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2310             c->std_output == EXEC_OUTPUT_KMSG ||
2311             c->std_output == EXEC_OUTPUT_JOURNAL ||
2312             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2313             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2314             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2315             c->std_error == EXEC_OUTPUT_SYSLOG ||
2316             c->std_error == EXEC_OUTPUT_KMSG ||
2317             c->std_error == EXEC_OUTPUT_JOURNAL ||
2318             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2319             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2320             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2321
2322                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2323
2324                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2325                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2326
2327                 fprintf(f,
2328                         "%sSyslogFacility: %s\n"
2329                         "%sSyslogLevel: %s\n",
2330                         prefix, strna(fac_str),
2331                         prefix, strna(lvl_str));
2332         }
2333
2334         if (c->capabilities) {
2335                 _cleanup_cap_free_charp_ char *t;
2336
2337                 t = cap_to_text(c->capabilities, NULL);
2338                 if (t)
2339                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2340         }
2341
2342         if (c->secure_bits)
2343                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2344                         prefix,
2345                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2346                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2347                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2348                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2349                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2350                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2351
2352         if (c->capability_bounding_set_drop) {
2353                 unsigned long l;
2354                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2355
2356                 for (l = 0; l <= cap_last_cap(); l++)
2357                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2358                                 fprintf(f, " %s", strna(capability_to_name(l)));
2359
2360                 fputs("\n", f);
2361         }
2362
2363         if (c->user)
2364                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2365         if (c->group)
2366                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2367
2368         if (strv_length(c->supplementary_groups) > 0) {
2369                 fprintf(f, "%sSupplementaryGroups:", prefix);
2370                 strv_fprintf(f, c->supplementary_groups);
2371                 fputs("\n", f);
2372         }
2373
2374         if (c->pam_name)
2375                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2376
2377         if (strv_length(c->read_write_dirs) > 0) {
2378                 fprintf(f, "%sReadWriteDirs:", prefix);
2379                 strv_fprintf(f, c->read_write_dirs);
2380                 fputs("\n", f);
2381         }
2382
2383         if (strv_length(c->read_only_dirs) > 0) {
2384                 fprintf(f, "%sReadOnlyDirs:", prefix);
2385                 strv_fprintf(f, c->read_only_dirs);
2386                 fputs("\n", f);
2387         }
2388
2389         if (strv_length(c->inaccessible_dirs) > 0) {
2390                 fprintf(f, "%sInaccessibleDirs:", prefix);
2391                 strv_fprintf(f, c->inaccessible_dirs);
2392                 fputs("\n", f);
2393         }
2394
2395         if (c->utmp_id)
2396                 fprintf(f,
2397                         "%sUtmpIdentifier: %s\n",
2398                         prefix, c->utmp_id);
2399
2400         if (c->selinux_context)
2401                 fprintf(f,
2402                         "%sSELinuxContext: %s%s\n",
2403                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2404
2405         if (c->personality != 0xffffffffUL)
2406                 fprintf(f,
2407                         "%sPersonality: %s\n",
2408                         prefix, strna(personality_to_string(c->personality)));
2409
2410         if (c->syscall_filter) {
2411 #ifdef HAVE_SECCOMP
2412                 Iterator j;
2413                 void *id;
2414                 bool first = true;
2415 #endif
2416
2417                 fprintf(f,
2418                         "%sSystemCallFilter: ",
2419                         prefix);
2420
2421                 if (!c->syscall_whitelist)
2422                         fputc('~', f);
2423
2424 #ifdef HAVE_SECCOMP
2425                 SET_FOREACH(id, c->syscall_filter, j) {
2426                         _cleanup_free_ char *name = NULL;
2427
2428                         if (first)
2429                                 first = false;
2430                         else
2431                                 fputc(' ', f);
2432
2433                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2434                         fputs(strna(name), f);
2435                 }
2436 #endif
2437
2438                 fputc('\n', f);
2439         }
2440
2441         if (c->syscall_archs) {
2442 #ifdef HAVE_SECCOMP
2443                 Iterator j;
2444                 void *id;
2445 #endif
2446
2447                 fprintf(f,
2448                         "%sSystemCallArchitectures:",
2449                         prefix);
2450
2451 #ifdef HAVE_SECCOMP
2452                 SET_FOREACH(id, c->syscall_archs, j)
2453                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2454 #endif
2455                 fputc('\n', f);
2456         }
2457
2458         if (c->syscall_errno != 0)
2459                 fprintf(f,
2460                         "%sSystemCallErrorNumber: %s\n",
2461                         prefix, strna(errno_to_name(c->syscall_errno)));
2462
2463         if (c->apparmor_profile)
2464                 fprintf(f,
2465                         "%sAppArmorProfile: %s%s\n",
2466                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2467 }
2468
2469 bool exec_context_maintains_privileges(ExecContext *c) {
2470         assert(c);
2471
2472         /* Returns true if the process forked off would run run under
2473          * an unchanged UID or as root. */
2474
2475         if (!c->user)
2476                 return true;
2477
2478         if (streq(c->user, "root") || streq(c->user, "0"))
2479                 return true;
2480
2481         return false;
2482 }
2483
2484 void exec_status_start(ExecStatus *s, pid_t pid) {
2485         assert(s);
2486
2487         zero(*s);
2488         s->pid = pid;
2489         dual_timestamp_get(&s->start_timestamp);
2490 }
2491
2492 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2493         assert(s);
2494
2495         if (s->pid && s->pid != pid)
2496                 zero(*s);
2497
2498         s->pid = pid;
2499         dual_timestamp_get(&s->exit_timestamp);
2500
2501         s->code = code;
2502         s->status = status;
2503
2504         if (context) {
2505                 if (context->utmp_id)
2506                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2507
2508                 exec_context_tty_reset(context);
2509         }
2510 }
2511
2512 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2513         char buf[FORMAT_TIMESTAMP_MAX];
2514
2515         assert(s);
2516         assert(f);
2517
2518         if (s->pid <= 0)
2519                 return;
2520
2521         prefix = strempty(prefix);
2522
2523         fprintf(f,
2524                 "%sPID: "PID_FMT"\n",
2525                 prefix, s->pid);
2526
2527         if (s->start_timestamp.realtime > 0)
2528                 fprintf(f,
2529                         "%sStart Timestamp: %s\n",
2530                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2531
2532         if (s->exit_timestamp.realtime > 0)
2533                 fprintf(f,
2534                         "%sExit Timestamp: %s\n"
2535                         "%sExit Code: %s\n"
2536                         "%sExit Status: %i\n",
2537                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2538                         prefix, sigchld_code_to_string(s->code),
2539                         prefix, s->status);
2540 }
2541
2542 char *exec_command_line(char **argv) {
2543         size_t k;
2544         char *n, *p, **a;
2545         bool first = true;
2546
2547         assert(argv);
2548
2549         k = 1;
2550         STRV_FOREACH(a, argv)
2551                 k += strlen(*a)+3;
2552
2553         if (!(n = new(char, k)))
2554                 return NULL;
2555
2556         p = n;
2557         STRV_FOREACH(a, argv) {
2558
2559                 if (!first)
2560                         *(p++) = ' ';
2561                 else
2562                         first = false;
2563
2564                 if (strpbrk(*a, WHITESPACE)) {
2565                         *(p++) = '\'';
2566                         p = stpcpy(p, *a);
2567                         *(p++) = '\'';
2568                 } else
2569                         p = stpcpy(p, *a);
2570
2571         }
2572
2573         *p = 0;
2574
2575         /* FIXME: this doesn't really handle arguments that have
2576          * spaces and ticks in them */
2577
2578         return n;
2579 }
2580
2581 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2582         _cleanup_free_ char *cmd = NULL;
2583         const char *prefix2;
2584
2585         assert(c);
2586         assert(f);
2587
2588         prefix = strempty(prefix);
2589         prefix2 = strjoina(prefix, "\t");
2590
2591         cmd = exec_command_line(c->argv);
2592         fprintf(f,
2593                 "%sCommand Line: %s\n",
2594                 prefix, cmd ? cmd : strerror(ENOMEM));
2595
2596         exec_status_dump(&c->exec_status, f, prefix2);
2597 }
2598
2599 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2600         assert(f);
2601
2602         prefix = strempty(prefix);
2603
2604         LIST_FOREACH(command, c, c)
2605                 exec_command_dump(c, f, prefix);
2606 }
2607
2608 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2609         ExecCommand *end;
2610
2611         assert(l);
2612         assert(e);
2613
2614         if (*l) {
2615                 /* It's kind of important, that we keep the order here */
2616                 LIST_FIND_TAIL(command, *l, end);
2617                 LIST_INSERT_AFTER(command, *l, end, e);
2618         } else
2619               *l = e;
2620 }
2621
2622 int exec_command_set(ExecCommand *c, const char *path, ...) {
2623         va_list ap;
2624         char **l, *p;
2625
2626         assert(c);
2627         assert(path);
2628
2629         va_start(ap, path);
2630         l = strv_new_ap(path, ap);
2631         va_end(ap);
2632
2633         if (!l)
2634                 return -ENOMEM;
2635
2636         p = strdup(path);
2637         if (!p) {
2638                 strv_free(l);
2639                 return -ENOMEM;
2640         }
2641
2642         free(c->path);
2643         c->path = p;
2644
2645         strv_free(c->argv);
2646         c->argv = l;
2647
2648         return 0;
2649 }
2650
2651 int exec_command_append(ExecCommand *c, const char *path, ...) {
2652         _cleanup_strv_free_ char **l = NULL;
2653         va_list ap;
2654         int r;
2655
2656         assert(c);
2657         assert(path);
2658
2659         va_start(ap, path);
2660         l = strv_new_ap(path, ap);
2661         va_end(ap);
2662
2663         if (!l)
2664                 return -ENOMEM;
2665
2666         r = strv_extend_strv(&c->argv, l);
2667         if (r < 0)
2668                 return r;
2669
2670         return 0;
2671 }
2672
2673
2674 static int exec_runtime_allocate(ExecRuntime **rt) {
2675
2676         if (*rt)
2677                 return 0;
2678
2679         *rt = new0(ExecRuntime, 1);
2680         if (!*rt)
2681                 return -ENOMEM;
2682
2683         (*rt)->n_ref = 1;
2684         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2685
2686         return 0;
2687 }
2688
2689 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2690         int r;
2691
2692         assert(rt);
2693         assert(c);
2694         assert(id);
2695
2696         if (*rt)
2697                 return 1;
2698
2699         if (!c->private_network && !c->private_tmp)
2700                 return 0;
2701
2702         r = exec_runtime_allocate(rt);
2703         if (r < 0)
2704                 return r;
2705
2706         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2707                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2708                         return -errno;
2709         }
2710
2711         if (c->private_tmp && !(*rt)->tmp_dir) {
2712                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2713                 if (r < 0)
2714                         return r;
2715         }
2716
2717         return 1;
2718 }
2719
2720 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2721         assert(r);
2722         assert(r->n_ref > 0);
2723
2724         r->n_ref++;
2725         return r;
2726 }
2727
2728 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2729
2730         if (!r)
2731                 return NULL;
2732
2733         assert(r->n_ref > 0);
2734
2735         r->n_ref--;
2736         if (r->n_ref <= 0) {
2737                 free(r->tmp_dir);
2738                 free(r->var_tmp_dir);
2739                 safe_close_pair(r->netns_storage_socket);
2740                 free(r);
2741         }
2742
2743         return NULL;
2744 }
2745
2746 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2747         assert(u);
2748         assert(f);
2749         assert(fds);
2750
2751         if (!rt)
2752                 return 0;
2753
2754         if (rt->tmp_dir)
2755                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2756
2757         if (rt->var_tmp_dir)
2758                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2759
2760         if (rt->netns_storage_socket[0] >= 0) {
2761                 int copy;
2762
2763                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2764                 if (copy < 0)
2765                         return copy;
2766
2767                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2768         }
2769
2770         if (rt->netns_storage_socket[1] >= 0) {
2771                 int copy;
2772
2773                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2774                 if (copy < 0)
2775                         return copy;
2776
2777                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2778         }
2779
2780         return 0;
2781 }
2782
2783 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2784         int r;
2785
2786         assert(rt);
2787         assert(key);
2788         assert(value);
2789
2790         if (streq(key, "tmp-dir")) {
2791                 char *copy;
2792
2793                 r = exec_runtime_allocate(rt);
2794                 if (r < 0)
2795                         return r;
2796
2797                 copy = strdup(value);
2798                 if (!copy)
2799                         return log_oom();
2800
2801                 free((*rt)->tmp_dir);
2802                 (*rt)->tmp_dir = copy;
2803
2804         } else if (streq(key, "var-tmp-dir")) {
2805                 char *copy;
2806
2807                 r = exec_runtime_allocate(rt);
2808                 if (r < 0)
2809                         return r;
2810
2811                 copy = strdup(value);
2812                 if (!copy)
2813                         return log_oom();
2814
2815                 free((*rt)->var_tmp_dir);
2816                 (*rt)->var_tmp_dir = copy;
2817
2818         } else if (streq(key, "netns-socket-0")) {
2819                 int fd;
2820
2821                 r = exec_runtime_allocate(rt);
2822                 if (r < 0)
2823                         return r;
2824
2825                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2826                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2827                 else {
2828                         safe_close((*rt)->netns_storage_socket[0]);
2829                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2830                 }
2831         } else if (streq(key, "netns-socket-1")) {
2832                 int fd;
2833
2834                 r = exec_runtime_allocate(rt);
2835                 if (r < 0)
2836                         return r;
2837
2838                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2839                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2840                 else {
2841                         safe_close((*rt)->netns_storage_socket[1]);
2842                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2843                 }
2844         } else
2845                 return 0;
2846
2847         return 1;
2848 }
2849
2850 static void *remove_tmpdir_thread(void *p) {
2851         _cleanup_free_ char *path = p;
2852
2853         rm_rf_dangerous(path, false, true, false);
2854         return NULL;
2855 }
2856
2857 void exec_runtime_destroy(ExecRuntime *rt) {
2858         int r;
2859
2860         if (!rt)
2861                 return;
2862
2863         /* If there are multiple users of this, let's leave the stuff around */
2864         if (rt->n_ref > 1)
2865                 return;
2866
2867         if (rt->tmp_dir) {
2868                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2869
2870                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2871                 if (r < 0) {
2872                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2873                         free(rt->tmp_dir);
2874                 }
2875
2876                 rt->tmp_dir = NULL;
2877         }
2878
2879         if (rt->var_tmp_dir) {
2880                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2881
2882                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2883                 if (r < 0) {
2884                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2885                         free(rt->var_tmp_dir);
2886                 }
2887
2888                 rt->var_tmp_dir = NULL;
2889         }
2890
2891         safe_close_pair(rt->netns_storage_socket);
2892 }
2893
2894 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2895         [EXEC_INPUT_NULL] = "null",
2896         [EXEC_INPUT_TTY] = "tty",
2897         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2898         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2899         [EXEC_INPUT_SOCKET] = "socket"
2900 };
2901
2902 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2903
2904 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2905         [EXEC_OUTPUT_INHERIT] = "inherit",
2906         [EXEC_OUTPUT_NULL] = "null",
2907         [EXEC_OUTPUT_TTY] = "tty",
2908         [EXEC_OUTPUT_SYSLOG] = "syslog",
2909         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2910         [EXEC_OUTPUT_KMSG] = "kmsg",
2911         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2912         [EXEC_OUTPUT_JOURNAL] = "journal",
2913         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2914         [EXEC_OUTPUT_SOCKET] = "socket"
2915 };
2916
2917 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);