chiark / gitweb /
unit: use weaker dependencies between mount and device units in --user mode
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <sys/personality.h>
35
36 #ifdef HAVE_PAM
37 #include <security/pam_appl.h>
38 #endif
39
40 #ifdef HAVE_SELINUX
41 #include <selinux/selinux.h>
42 #endif
43
44 #ifdef HAVE_SECCOMP
45 #include <seccomp.h>
46 #endif
47
48 #ifdef HAVE_APPARMOR
49 #include <sys/apparmor.h>
50 #endif
51
52 #include "execute.h"
53 #include "strv.h"
54 #include "macro.h"
55 #include "capability.h"
56 #include "util.h"
57 #include "log.h"
58 #include "sd-messages.h"
59 #include "ioprio.h"
60 #include "securebits.h"
61 #include "namespace.h"
62 #include "exit-status.h"
63 #include "missing.h"
64 #include "utmp-wtmp.h"
65 #include "def.h"
66 #include "path-util.h"
67 #include "env-util.h"
68 #include "fileio.h"
69 #include "unit.h"
70 #include "async.h"
71 #include "selinux-util.h"
72 #include "errno-list.h"
73 #include "af-list.h"
74 #include "mkdir.h"
75 #include "smack-util.h"
76 #include "bus-endpoint.h"
77 #include "cap-list.h"
78
79 #ifdef HAVE_APPARMOR
80 #include "apparmor-util.h"
81 #endif
82
83 #ifdef HAVE_SECCOMP
84 #include "seccomp-util.h"
85 #endif
86
87 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
88 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
89
90 /* This assumes there is a 'tty' group */
91 #define TTY_MODE 0620
92
93 #define SNDBUF_SIZE (8*1024*1024)
94
95 static int shift_fds(int fds[], unsigned n_fds) {
96         int start, restart_from;
97
98         if (n_fds <= 0)
99                 return 0;
100
101         /* Modifies the fds array! (sorts it) */
102
103         assert(fds);
104
105         start = 0;
106         for (;;) {
107                 int i;
108
109                 restart_from = -1;
110
111                 for (i = start; i < (int) n_fds; i++) {
112                         int nfd;
113
114                         /* Already at right index? */
115                         if (fds[i] == i+3)
116                                 continue;
117
118                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
119                                 return -errno;
120
121                         safe_close(fds[i]);
122                         fds[i] = nfd;
123
124                         /* Hmm, the fd we wanted isn't free? Then
125                          * let's remember that and try again from here */
126                         if (nfd != i+3 && restart_from < 0)
127                                 restart_from = i;
128                 }
129
130                 if (restart_from < 0)
131                         break;
132
133                 start = restart_from;
134         }
135
136         return 0;
137 }
138
139 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
140         unsigned i;
141         int r;
142
143         if (n_fds <= 0)
144                 return 0;
145
146         assert(fds);
147
148         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
149
150         for (i = 0; i < n_fds; i++) {
151
152                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
153                         return r;
154
155                 /* We unconditionally drop FD_CLOEXEC from the fds,
156                  * since after all we want to pass these fds to our
157                  * children */
158
159                 if ((r = fd_cloexec(fds[i], false)) < 0)
160                         return r;
161         }
162
163         return 0;
164 }
165
166 _pure_ static const char *tty_path(const ExecContext *context) {
167         assert(context);
168
169         if (context->tty_path)
170                 return context->tty_path;
171
172         return "/dev/console";
173 }
174
175 static void exec_context_tty_reset(const ExecContext *context) {
176         assert(context);
177
178         if (context->tty_vhangup)
179                 terminal_vhangup(tty_path(context));
180
181         if (context->tty_reset)
182                 reset_terminal(tty_path(context));
183
184         if (context->tty_vt_disallocate && context->tty_path)
185                 vt_disallocate(context->tty_path);
186 }
187
188 static bool is_terminal_output(ExecOutput o) {
189         return
190                 o == EXEC_OUTPUT_TTY ||
191                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
192                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
193                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
194 }
195
196 static int open_null_as(int flags, int nfd) {
197         int fd, r;
198
199         assert(nfd >= 0);
200
201         fd = open("/dev/null", flags|O_NOCTTY);
202         if (fd < 0)
203                 return -errno;
204
205         if (fd != nfd) {
206                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
207                 safe_close(fd);
208         } else
209                 r = nfd;
210
211         return r;
212 }
213
214 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
215         union sockaddr_union sa = {
216                 .un.sun_family = AF_UNIX,
217                 .un.sun_path = "/run/systemd/journal/stdout",
218         };
219         uid_t olduid = UID_INVALID;
220         gid_t oldgid = GID_INVALID;
221         int r;
222
223         if (gid != GID_INVALID) {
224                 oldgid = getgid();
225
226                 r = setegid(gid);
227                 if (r < 0)
228                         return -errno;
229         }
230
231         if (uid != UID_INVALID) {
232                 olduid = getuid();
233
234                 r = seteuid(uid);
235                 if (r < 0) {
236                         r = -errno;
237                         goto restore_gid;
238                 }
239         }
240
241         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
242         if (r < 0)
243                 r = -errno;
244
245         /* If we fail to restore the uid or gid, things will likely
246            fail later on. This should only happen if an LSM interferes. */
247
248         if (uid != UID_INVALID)
249                 (void) seteuid(olduid);
250
251  restore_gid:
252         if (gid != GID_INVALID)
253                 (void) setegid(oldgid);
254
255         return r;
256 }
257
258 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
259         int fd, r;
260
261         assert(context);
262         assert(output < _EXEC_OUTPUT_MAX);
263         assert(ident);
264         assert(nfd >= 0);
265
266         fd = socket(AF_UNIX, SOCK_STREAM, 0);
267         if (fd < 0)
268                 return -errno;
269
270         r = connect_journal_socket(fd, uid, gid);
271         if (r < 0)
272                 return r;
273
274         if (shutdown(fd, SHUT_RD) < 0) {
275                 safe_close(fd);
276                 return -errno;
277         }
278
279         fd_inc_sndbuf(fd, SNDBUF_SIZE);
280
281         dprintf(fd,
282                 "%s\n"
283                 "%s\n"
284                 "%i\n"
285                 "%i\n"
286                 "%i\n"
287                 "%i\n"
288                 "%i\n",
289                 context->syslog_identifier ? context->syslog_identifier : ident,
290                 unit_id,
291                 context->syslog_priority,
292                 !!context->syslog_level_prefix,
293                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
294                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
295                 is_terminal_output(output));
296
297         if (fd != nfd) {
298                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
299                 safe_close(fd);
300         } else
301                 r = nfd;
302
303         return r;
304 }
305 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
306         int fd, r;
307
308         assert(path);
309         assert(nfd >= 0);
310
311         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
312                 return fd;
313
314         if (fd != nfd) {
315                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
316                 safe_close(fd);
317         } else
318                 r = nfd;
319
320         return r;
321 }
322
323 static bool is_terminal_input(ExecInput i) {
324         return
325                 i == EXEC_INPUT_TTY ||
326                 i == EXEC_INPUT_TTY_FORCE ||
327                 i == EXEC_INPUT_TTY_FAIL;
328 }
329
330 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
331
332         if (is_terminal_input(std_input) && !apply_tty_stdin)
333                 return EXEC_INPUT_NULL;
334
335         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
336                 return EXEC_INPUT_NULL;
337
338         return std_input;
339 }
340
341 static int fixup_output(ExecOutput std_output, int socket_fd) {
342
343         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
344                 return EXEC_OUTPUT_INHERIT;
345
346         return std_output;
347 }
348
349 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
350         ExecInput i;
351
352         assert(context);
353
354         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
355
356         switch (i) {
357
358         case EXEC_INPUT_NULL:
359                 return open_null_as(O_RDONLY, STDIN_FILENO);
360
361         case EXEC_INPUT_TTY:
362         case EXEC_INPUT_TTY_FORCE:
363         case EXEC_INPUT_TTY_FAIL: {
364                 int fd, r;
365
366                 fd = acquire_terminal(tty_path(context),
367                                       i == EXEC_INPUT_TTY_FAIL,
368                                       i == EXEC_INPUT_TTY_FORCE,
369                                       false,
370                                       USEC_INFINITY);
371                 if (fd < 0)
372                         return fd;
373
374                 if (fd != STDIN_FILENO) {
375                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
376                         safe_close(fd);
377                 } else
378                         r = STDIN_FILENO;
379
380                 return r;
381         }
382
383         case EXEC_INPUT_SOCKET:
384                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
385
386         default:
387                 assert_not_reached("Unknown input type");
388         }
389 }
390
391 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
392         ExecOutput o;
393         ExecInput i;
394         int r;
395
396         assert(context);
397         assert(ident);
398
399         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
400         o = fixup_output(context->std_output, socket_fd);
401
402         if (fileno == STDERR_FILENO) {
403                 ExecOutput e;
404                 e = fixup_output(context->std_error, socket_fd);
405
406                 /* This expects the input and output are already set up */
407
408                 /* Don't change the stderr file descriptor if we inherit all
409                  * the way and are not on a tty */
410                 if (e == EXEC_OUTPUT_INHERIT &&
411                     o == EXEC_OUTPUT_INHERIT &&
412                     i == EXEC_INPUT_NULL &&
413                     !is_terminal_input(context->std_input) &&
414                     getppid () != 1)
415                         return fileno;
416
417                 /* Duplicate from stdout if possible */
418                 if (e == o || e == EXEC_OUTPUT_INHERIT)
419                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
420
421                 o = e;
422
423         } else if (o == EXEC_OUTPUT_INHERIT) {
424                 /* If input got downgraded, inherit the original value */
425                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
426                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
427
428                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
429                 if (i != EXEC_INPUT_NULL)
430                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
431
432                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
433                 if (getppid() != 1)
434                         return fileno;
435
436                 /* We need to open /dev/null here anew, to get the right access mode. */
437                 return open_null_as(O_WRONLY, fileno);
438         }
439
440         switch (o) {
441
442         case EXEC_OUTPUT_NULL:
443                 return open_null_as(O_WRONLY, fileno);
444
445         case EXEC_OUTPUT_TTY:
446                 if (is_terminal_input(i))
447                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
448
449                 /* We don't reset the terminal if this is just about output */
450                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
451
452         case EXEC_OUTPUT_SYSLOG:
453         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
454         case EXEC_OUTPUT_KMSG:
455         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
456         case EXEC_OUTPUT_JOURNAL:
457         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
458                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
459                 if (r < 0) {
460                         log_unit_struct(unit_id,
461                                         LOG_ERR,
462                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
463                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
464                                                     unit_id, strerror(-r)),
465                                         LOG_ERRNO(-r),
466                                         NULL);
467                         r = open_null_as(O_WRONLY, fileno);
468                 }
469                 return r;
470
471         case EXEC_OUTPUT_SOCKET:
472                 assert(socket_fd >= 0);
473                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
474
475         default:
476                 assert_not_reached("Unknown error type");
477         }
478 }
479
480 static int chown_terminal(int fd, uid_t uid) {
481         struct stat st;
482
483         assert(fd >= 0);
484
485         /* This might fail. What matters are the results. */
486         (void) fchown(fd, uid, -1);
487         (void) fchmod(fd, TTY_MODE);
488
489         if (fstat(fd, &st) < 0)
490                 return -errno;
491
492         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
493                 return -EPERM;
494
495         return 0;
496 }
497
498 static int setup_confirm_stdio(int *_saved_stdin,
499                                int *_saved_stdout) {
500         int fd = -1, saved_stdin, saved_stdout = -1, r;
501
502         assert(_saved_stdin);
503         assert(_saved_stdout);
504
505         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
506         if (saved_stdin < 0)
507                 return -errno;
508
509         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
510         if (saved_stdout < 0) {
511                 r = errno;
512                 goto fail;
513         }
514
515         fd = acquire_terminal(
516                         "/dev/console",
517                         false,
518                         false,
519                         false,
520                         DEFAULT_CONFIRM_USEC);
521         if (fd < 0) {
522                 r = fd;
523                 goto fail;
524         }
525
526         r = chown_terminal(fd, getuid());
527         if (r < 0)
528                 goto fail;
529
530         if (dup2(fd, STDIN_FILENO) < 0) {
531                 r = -errno;
532                 goto fail;
533         }
534
535         if (dup2(fd, STDOUT_FILENO) < 0) {
536                 r = -errno;
537                 goto fail;
538         }
539
540         if (fd >= 2)
541                 safe_close(fd);
542
543         *_saved_stdin = saved_stdin;
544         *_saved_stdout = saved_stdout;
545
546         return 0;
547
548 fail:
549         safe_close(saved_stdout);
550         safe_close(saved_stdin);
551         safe_close(fd);
552
553         return r;
554 }
555
556 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
557         _cleanup_close_ int fd = -1;
558         va_list ap;
559
560         assert(format);
561
562         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
563         if (fd < 0)
564                 return fd;
565
566         va_start(ap, format);
567         vdprintf(fd, format, ap);
568         va_end(ap);
569
570         return 0;
571 }
572
573 static int restore_confirm_stdio(int *saved_stdin,
574                                  int *saved_stdout) {
575
576         int r = 0;
577
578         assert(saved_stdin);
579         assert(saved_stdout);
580
581         release_terminal();
582
583         if (*saved_stdin >= 0)
584                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
585                         r = -errno;
586
587         if (*saved_stdout >= 0)
588                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
589                         r = -errno;
590
591         safe_close(*saved_stdin);
592         safe_close(*saved_stdout);
593
594         return r;
595 }
596
597 static int ask_for_confirmation(char *response, char **argv) {
598         int saved_stdout = -1, saved_stdin = -1, r;
599         _cleanup_free_ char *line = NULL;
600
601         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
602         if (r < 0)
603                 return r;
604
605         line = exec_command_line(argv);
606         if (!line)
607                 return -ENOMEM;
608
609         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
610
611         restore_confirm_stdio(&saved_stdin, &saved_stdout);
612
613         return r;
614 }
615
616 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
617         bool keep_groups = false;
618         int r;
619
620         assert(context);
621
622         /* Lookup and set GID and supplementary group list. Here too
623          * we avoid NSS lookups for gid=0. */
624
625         if (context->group || username) {
626
627                 if (context->group) {
628                         const char *g = context->group;
629
630                         if ((r = get_group_creds(&g, &gid)) < 0)
631                                 return r;
632                 }
633
634                 /* First step, initialize groups from /etc/groups */
635                 if (username && gid != 0) {
636                         if (initgroups(username, gid) < 0)
637                                 return -errno;
638
639                         keep_groups = true;
640                 }
641
642                 /* Second step, set our gids */
643                 if (setresgid(gid, gid, gid) < 0)
644                         return -errno;
645         }
646
647         if (context->supplementary_groups) {
648                 int ngroups_max, k;
649                 gid_t *gids;
650                 char **i;
651
652                 /* Final step, initialize any manually set supplementary groups */
653                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
654
655                 if (!(gids = new(gid_t, ngroups_max)))
656                         return -ENOMEM;
657
658                 if (keep_groups) {
659                         if ((k = getgroups(ngroups_max, gids)) < 0) {
660                                 free(gids);
661                                 return -errno;
662                         }
663                 } else
664                         k = 0;
665
666                 STRV_FOREACH(i, context->supplementary_groups) {
667                         const char *g;
668
669                         if (k >= ngroups_max) {
670                                 free(gids);
671                                 return -E2BIG;
672                         }
673
674                         g = *i;
675                         r = get_group_creds(&g, gids+k);
676                         if (r < 0) {
677                                 free(gids);
678                                 return r;
679                         }
680
681                         k++;
682                 }
683
684                 if (setgroups(k, gids) < 0) {
685                         free(gids);
686                         return -errno;
687                 }
688
689                 free(gids);
690         }
691
692         return 0;
693 }
694
695 static int enforce_user(const ExecContext *context, uid_t uid) {
696         assert(context);
697
698         /* Sets (but doesn't lookup) the uid and make sure we keep the
699          * capabilities while doing so. */
700
701         if (context->capabilities) {
702                 _cleanup_cap_free_ cap_t d = NULL;
703                 static const cap_value_t bits[] = {
704                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
705                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
706                 };
707
708                 /* First step: If we need to keep capabilities but
709                  * drop privileges we need to make sure we keep our
710                  * caps, while we drop privileges. */
711                 if (uid != 0) {
712                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
713
714                         if (prctl(PR_GET_SECUREBITS) != sb)
715                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
716                                         return -errno;
717                 }
718
719                 /* Second step: set the capabilities. This will reduce
720                  * the capabilities to the minimum we need. */
721
722                 d = cap_dup(context->capabilities);
723                 if (!d)
724                         return -errno;
725
726                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
727                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
728                         return -errno;
729
730                 if (cap_set_proc(d) < 0)
731                         return -errno;
732         }
733
734         /* Third step: actually set the uids */
735         if (setresuid(uid, uid, uid) < 0)
736                 return -errno;
737
738         /* At this point we should have all necessary capabilities but
739            are otherwise a normal user. However, the caps might got
740            corrupted due to the setresuid() so we need clean them up
741            later. This is done outside of this call. */
742
743         return 0;
744 }
745
746 #ifdef HAVE_PAM
747
748 static int null_conv(
749                 int num_msg,
750                 const struct pam_message **msg,
751                 struct pam_response **resp,
752                 void *appdata_ptr) {
753
754         /* We don't support conversations */
755
756         return PAM_CONV_ERR;
757 }
758
759 static int setup_pam(
760                 const char *name,
761                 const char *user,
762                 uid_t uid,
763                 const char *tty,
764                 char ***pam_env,
765                 int fds[], unsigned n_fds) {
766
767         static const struct pam_conv conv = {
768                 .conv = null_conv,
769                 .appdata_ptr = NULL
770         };
771
772         pam_handle_t *handle = NULL;
773         sigset_t ss, old_ss;
774         int pam_code = PAM_SUCCESS;
775         int err;
776         char **e = NULL;
777         bool close_session = false;
778         pid_t pam_pid = 0, parent_pid;
779         int flags = 0;
780
781         assert(name);
782         assert(user);
783         assert(pam_env);
784
785         /* We set up PAM in the parent process, then fork. The child
786          * will then stay around until killed via PR_GET_PDEATHSIG or
787          * systemd via the cgroup logic. It will then remove the PAM
788          * session again. The parent process will exec() the actual
789          * daemon. We do things this way to ensure that the main PID
790          * of the daemon is the one we initially fork()ed. */
791
792         if (log_get_max_level() < LOG_DEBUG)
793                 flags |= PAM_SILENT;
794
795         pam_code = pam_start(name, user, &conv, &handle);
796         if (pam_code != PAM_SUCCESS) {
797                 handle = NULL;
798                 goto fail;
799         }
800
801         if (tty) {
802                 pam_code = pam_set_item(handle, PAM_TTY, tty);
803                 if (pam_code != PAM_SUCCESS)
804                         goto fail;
805         }
806
807         pam_code = pam_acct_mgmt(handle, flags);
808         if (pam_code != PAM_SUCCESS)
809                 goto fail;
810
811         pam_code = pam_open_session(handle, flags);
812         if (pam_code != PAM_SUCCESS)
813                 goto fail;
814
815         close_session = true;
816
817         e = pam_getenvlist(handle);
818         if (!e) {
819                 pam_code = PAM_BUF_ERR;
820                 goto fail;
821         }
822
823         /* Block SIGTERM, so that we know that it won't get lost in
824          * the child */
825         if (sigemptyset(&ss) < 0 ||
826             sigaddset(&ss, SIGTERM) < 0 ||
827             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
828                 goto fail;
829
830         parent_pid = getpid();
831
832         pam_pid = fork();
833         if (pam_pid < 0)
834                 goto fail;
835
836         if (pam_pid == 0) {
837                 int sig;
838                 int r = EXIT_PAM;
839
840                 /* The child's job is to reset the PAM session on
841                  * termination */
842
843                 /* This string must fit in 10 chars (i.e. the length
844                  * of "/sbin/init"), to look pretty in /bin/ps */
845                 rename_process("(sd-pam)");
846
847                 /* Make sure we don't keep open the passed fds in this
848                 child. We assume that otherwise only those fds are
849                 open here that have been opened by PAM. */
850                 close_many(fds, n_fds);
851
852                 /* Drop privileges - we don't need any to pam_close_session
853                  * and this will make PR_SET_PDEATHSIG work in most cases.
854                  * If this fails, ignore the error - but expect sd-pam threads
855                  * to fail to exit normally */
856                 if (setresuid(uid, uid, uid) < 0)
857                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
858
859                 /* Wait until our parent died. This will only work if
860                  * the above setresuid() succeeds, otherwise the kernel
861                  * will not allow unprivileged parents kill their privileged
862                  * children this way. We rely on the control groups kill logic
863                  * to do the rest for us. */
864                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
865                         goto child_finish;
866
867                 /* Check if our parent process might already have
868                  * died? */
869                 if (getppid() == parent_pid) {
870                         for (;;) {
871                                 if (sigwait(&ss, &sig) < 0) {
872                                         if (errno == EINTR)
873                                                 continue;
874
875                                         goto child_finish;
876                                 }
877
878                                 assert(sig == SIGTERM);
879                                 break;
880                         }
881                 }
882
883                 /* If our parent died we'll end the session */
884                 if (getppid() != parent_pid) {
885                         pam_code = pam_close_session(handle, flags);
886                         if (pam_code != PAM_SUCCESS)
887                                 goto child_finish;
888                 }
889
890                 r = 0;
891
892         child_finish:
893                 pam_end(handle, pam_code | flags);
894                 _exit(r);
895         }
896
897         /* If the child was forked off successfully it will do all the
898          * cleanups, so forget about the handle here. */
899         handle = NULL;
900
901         /* Unblock SIGTERM again in the parent */
902         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
903                 goto fail;
904
905         /* We close the log explicitly here, since the PAM modules
906          * might have opened it, but we don't want this fd around. */
907         closelog();
908
909         *pam_env = e;
910         e = NULL;
911
912         return 0;
913
914 fail:
915         if (pam_code != PAM_SUCCESS) {
916                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
917                 err = -EPERM;  /* PAM errors do not map to errno */
918         } else {
919                 log_error_errno(errno, "PAM failed: %m");
920                 err = -errno;
921         }
922
923         if (handle) {
924                 if (close_session)
925                         pam_code = pam_close_session(handle, flags);
926
927                 pam_end(handle, pam_code | flags);
928         }
929
930         strv_free(e);
931
932         closelog();
933
934         if (pam_pid > 1) {
935                 kill(pam_pid, SIGTERM);
936                 kill(pam_pid, SIGCONT);
937         }
938
939         return err;
940 }
941 #endif
942
943 static void rename_process_from_path(const char *path) {
944         char process_name[11];
945         const char *p;
946         size_t l;
947
948         /* This resulting string must fit in 10 chars (i.e. the length
949          * of "/sbin/init") to look pretty in /bin/ps */
950
951         p = basename(path);
952         if (isempty(p)) {
953                 rename_process("(...)");
954                 return;
955         }
956
957         l = strlen(p);
958         if (l > 8) {
959                 /* The end of the process name is usually more
960                  * interesting, since the first bit might just be
961                  * "systemd-" */
962                 p = p + l - 8;
963                 l = 8;
964         }
965
966         process_name[0] = '(';
967         memcpy(process_name+1, p, l);
968         process_name[1+l] = ')';
969         process_name[1+l+1] = 0;
970
971         rename_process(process_name);
972 }
973
974 #ifdef HAVE_SECCOMP
975
976 static int apply_seccomp(const ExecContext *c) {
977         uint32_t negative_action, action;
978         scmp_filter_ctx *seccomp;
979         Iterator i;
980         void *id;
981         int r;
982
983         assert(c);
984
985         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
986
987         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
988         if (!seccomp)
989                 return -ENOMEM;
990
991         if (c->syscall_archs) {
992
993                 SET_FOREACH(id, c->syscall_archs, i) {
994                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
995                         if (r == -EEXIST)
996                                 continue;
997                         if (r < 0)
998                                 goto finish;
999                 }
1000
1001         } else {
1002                 r = seccomp_add_secondary_archs(seccomp);
1003                 if (r < 0)
1004                         goto finish;
1005         }
1006
1007         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1008         SET_FOREACH(id, c->syscall_filter, i) {
1009                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1010                 if (r < 0)
1011                         goto finish;
1012         }
1013
1014         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1015         if (r < 0)
1016                 goto finish;
1017
1018         r = seccomp_load(seccomp);
1019
1020 finish:
1021         seccomp_release(seccomp);
1022         return r;
1023 }
1024
1025 static int apply_address_families(const ExecContext *c) {
1026         scmp_filter_ctx *seccomp;
1027         Iterator i;
1028         int r;
1029
1030         assert(c);
1031
1032         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1033         if (!seccomp)
1034                 return -ENOMEM;
1035
1036         r = seccomp_add_secondary_archs(seccomp);
1037         if (r < 0)
1038                 goto finish;
1039
1040         if (c->address_families_whitelist) {
1041                 int af, first = 0, last = 0;
1042                 void *afp;
1043
1044                 /* If this is a whitelist, we first block the address
1045                  * families that are out of range and then everything
1046                  * that is not in the set. First, we find the lowest
1047                  * and highest address family in the set. */
1048
1049                 SET_FOREACH(afp, c->address_families, i) {
1050                         af = PTR_TO_INT(afp);
1051
1052                         if (af <= 0 || af >= af_max())
1053                                 continue;
1054
1055                         if (first == 0 || af < first)
1056                                 first = af;
1057
1058                         if (last == 0 || af > last)
1059                                 last = af;
1060                 }
1061
1062                 assert((first == 0) == (last == 0));
1063
1064                 if (first == 0) {
1065
1066                         /* No entries in the valid range, block everything */
1067                         r = seccomp_rule_add(
1068                                         seccomp,
1069                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1070                                         SCMP_SYS(socket),
1071                                         0);
1072                         if (r < 0)
1073                                 goto finish;
1074
1075                 } else {
1076
1077                         /* Block everything below the first entry */
1078                         r = seccomp_rule_add(
1079                                         seccomp,
1080                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1081                                         SCMP_SYS(socket),
1082                                         1,
1083                                         SCMP_A0(SCMP_CMP_LT, first));
1084                         if (r < 0)
1085                                 goto finish;
1086
1087                         /* Block everything above the last entry */
1088                         r = seccomp_rule_add(
1089                                         seccomp,
1090                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1091                                         SCMP_SYS(socket),
1092                                         1,
1093                                         SCMP_A0(SCMP_CMP_GT, last));
1094                         if (r < 0)
1095                                 goto finish;
1096
1097                         /* Block everything between the first and last
1098                          * entry */
1099                         for (af = 1; af < af_max(); af++) {
1100
1101                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1102                                         continue;
1103
1104                                 r = seccomp_rule_add(
1105                                                 seccomp,
1106                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1107                                                 SCMP_SYS(socket),
1108                                                 1,
1109                                                 SCMP_A0(SCMP_CMP_EQ, af));
1110                                 if (r < 0)
1111                                         goto finish;
1112                         }
1113                 }
1114
1115         } else {
1116                 void *af;
1117
1118                 /* If this is a blacklist, then generate one rule for
1119                  * each address family that are then combined in OR
1120                  * checks. */
1121
1122                 SET_FOREACH(af, c->address_families, i) {
1123
1124                         r = seccomp_rule_add(
1125                                         seccomp,
1126                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1127                                         SCMP_SYS(socket),
1128                                         1,
1129                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1130                         if (r < 0)
1131                                 goto finish;
1132                 }
1133         }
1134
1135         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1136         if (r < 0)
1137                 goto finish;
1138
1139         r = seccomp_load(seccomp);
1140
1141 finish:
1142         seccomp_release(seccomp);
1143         return r;
1144 }
1145
1146 #endif
1147
1148 static void do_idle_pipe_dance(int idle_pipe[4]) {
1149         assert(idle_pipe);
1150
1151
1152         safe_close(idle_pipe[1]);
1153         safe_close(idle_pipe[2]);
1154
1155         if (idle_pipe[0] >= 0) {
1156                 int r;
1157
1158                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1159
1160                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1161                         /* Signal systemd that we are bored and want to continue. */
1162                         write(idle_pipe[3], "x", 1);
1163
1164                         /* Wait for systemd to react to the signal above. */
1165                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1166                 }
1167
1168                 safe_close(idle_pipe[0]);
1169
1170         }
1171
1172         safe_close(idle_pipe[3]);
1173 }
1174
1175 static int build_environment(
1176                 const ExecContext *c,
1177                 unsigned n_fds,
1178                 usec_t watchdog_usec,
1179                 const char *home,
1180                 const char *username,
1181                 const char *shell,
1182                 char ***ret) {
1183
1184         _cleanup_strv_free_ char **our_env = NULL;
1185         unsigned n_env = 0;
1186         char *x;
1187
1188         assert(c);
1189         assert(ret);
1190
1191         our_env = new0(char*, 10);
1192         if (!our_env)
1193                 return -ENOMEM;
1194
1195         if (n_fds > 0) {
1196                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1197                         return -ENOMEM;
1198                 our_env[n_env++] = x;
1199
1200                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1201                         return -ENOMEM;
1202                 our_env[n_env++] = x;
1203         }
1204
1205         if (watchdog_usec > 0) {
1206                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1207                         return -ENOMEM;
1208                 our_env[n_env++] = x;
1209
1210                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1211                         return -ENOMEM;
1212                 our_env[n_env++] = x;
1213         }
1214
1215         if (home) {
1216                 x = strappend("HOME=", home);
1217                 if (!x)
1218                         return -ENOMEM;
1219                 our_env[n_env++] = x;
1220         }
1221
1222         if (username) {
1223                 x = strappend("LOGNAME=", username);
1224                 if (!x)
1225                         return -ENOMEM;
1226                 our_env[n_env++] = x;
1227
1228                 x = strappend("USER=", username);
1229                 if (!x)
1230                         return -ENOMEM;
1231                 our_env[n_env++] = x;
1232         }
1233
1234         if (shell) {
1235                 x = strappend("SHELL=", shell);
1236                 if (!x)
1237                         return -ENOMEM;
1238                 our_env[n_env++] = x;
1239         }
1240
1241         if (is_terminal_input(c->std_input) ||
1242             c->std_output == EXEC_OUTPUT_TTY ||
1243             c->std_error == EXEC_OUTPUT_TTY ||
1244             c->tty_path) {
1245
1246                 x = strdup(default_term_for_tty(tty_path(c)));
1247                 if (!x)
1248                         return -ENOMEM;
1249                 our_env[n_env++] = x;
1250         }
1251
1252         our_env[n_env++] = NULL;
1253         assert(n_env <= 10);
1254
1255         *ret = our_env;
1256         our_env = NULL;
1257
1258         return 0;
1259 }
1260
1261 static int exec_child(
1262                 ExecCommand *command,
1263                 const ExecContext *context,
1264                 const ExecParameters *params,
1265                 ExecRuntime *runtime,
1266                 char **argv,
1267                 int socket_fd,
1268                 int *fds, unsigned n_fds,
1269                 char **files_env,
1270                 int *exit_status) {
1271
1272         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1273         _cleanup_free_ char *mac_selinux_context_net = NULL;
1274         const char *username = NULL, *home = NULL, *shell = NULL;
1275         unsigned n_dont_close = 0;
1276         int dont_close[n_fds + 4];
1277         uid_t uid = UID_INVALID;
1278         gid_t gid = GID_INVALID;
1279         int i, r;
1280
1281         assert(command);
1282         assert(context);
1283         assert(params);
1284         assert(exit_status);
1285
1286         rename_process_from_path(command->path);
1287
1288         /* We reset exactly these signals, since they are the
1289          * only ones we set to SIG_IGN in the main daemon. All
1290          * others we leave untouched because we set them to
1291          * SIG_DFL or a valid handler initially, both of which
1292          * will be demoted to SIG_DFL. */
1293         default_signals(SIGNALS_CRASH_HANDLER,
1294                         SIGNALS_IGNORE, -1);
1295
1296         if (context->ignore_sigpipe)
1297                 ignore_signals(SIGPIPE, -1);
1298
1299         r = reset_signal_mask();
1300         if (r < 0) {
1301                 *exit_status = EXIT_SIGNAL_MASK;
1302                 return r;
1303         }
1304
1305         if (params->idle_pipe)
1306                 do_idle_pipe_dance(params->idle_pipe);
1307
1308         /* Close sockets very early to make sure we don't
1309          * block init reexecution because it cannot bind its
1310          * sockets */
1311
1312         log_forget_fds();
1313
1314         if (socket_fd >= 0)
1315                 dont_close[n_dont_close++] = socket_fd;
1316         if (n_fds > 0) {
1317                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1318                 n_dont_close += n_fds;
1319         }
1320         if (params->bus_endpoint_fd >= 0)
1321                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1322         if (runtime) {
1323                 if (runtime->netns_storage_socket[0] >= 0)
1324                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1325                 if (runtime->netns_storage_socket[1] >= 0)
1326                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1327         }
1328
1329         r = close_all_fds(dont_close, n_dont_close);
1330         if (r < 0) {
1331                 *exit_status = EXIT_FDS;
1332                 return r;
1333         }
1334
1335         if (!context->same_pgrp)
1336                 if (setsid() < 0) {
1337                         *exit_status = EXIT_SETSID;
1338                         return -errno;
1339                 }
1340
1341         exec_context_tty_reset(context);
1342
1343         if (params->confirm_spawn) {
1344                 char response;
1345
1346                 r = ask_for_confirmation(&response, argv);
1347                 if (r == -ETIMEDOUT)
1348                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1349                 else if (r < 0)
1350                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1351                 else if (response == 's') {
1352                         write_confirm_message("Skipping execution.\n");
1353                         *exit_status = EXIT_CONFIRM;
1354                         return -ECANCELED;
1355                 } else if (response == 'n') {
1356                         write_confirm_message("Failing execution.\n");
1357                         *exit_status = 0;
1358                         return 0;
1359                 }
1360         }
1361
1362         if (context->user) {
1363                 username = context->user;
1364                 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1365                 if (r < 0) {
1366                         *exit_status = EXIT_USER;
1367                         return r;
1368                 }
1369         }
1370
1371         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1372          * must sure to drop O_NONBLOCK */
1373         if (socket_fd >= 0)
1374                 fd_nonblock(socket_fd, false);
1375
1376         r = setup_input(context, socket_fd, params->apply_tty_stdin);
1377         if (r < 0) {
1378                 *exit_status = EXIT_STDIN;
1379                 return r;
1380         }
1381
1382         r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1383         if (r < 0) {
1384                 *exit_status = EXIT_STDOUT;
1385                 return r;
1386         }
1387
1388         r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389         if (r < 0) {
1390                 *exit_status = EXIT_STDERR;
1391                 return r;
1392         }
1393
1394         if (params->cgroup_path) {
1395                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1396                 if (r < 0) {
1397                         *exit_status = EXIT_CGROUP;
1398                         return r;
1399                 }
1400         }
1401
1402         if (context->oom_score_adjust_set) {
1403                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1404
1405                 /* When we can't make this change due to EPERM, then
1406                  * let's silently skip over it. User namespaces
1407                  * prohibit write access to this file, and we
1408                  * shouldn't trip up over that. */
1409
1410                 sprintf(t, "%i", context->oom_score_adjust);
1411                 r = write_string_file("/proc/self/oom_score_adj", t);
1412                 if (r == -EPERM || r == -EACCES) {
1413                         log_open();
1414                         log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1415                         log_close();
1416                 } else if (r < 0) {
1417                         *exit_status = EXIT_OOM_ADJUST;
1418                         return -errno;
1419                 }
1420         }
1421
1422         if (context->nice_set)
1423                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1424                         *exit_status = EXIT_NICE;
1425                         return -errno;
1426                 }
1427
1428         if (context->cpu_sched_set) {
1429                 struct sched_param param = {
1430                         .sched_priority = context->cpu_sched_priority,
1431                 };
1432
1433                 r = sched_setscheduler(0,
1434                                        context->cpu_sched_policy |
1435                                        (context->cpu_sched_reset_on_fork ?
1436                                         SCHED_RESET_ON_FORK : 0),
1437                                        &param);
1438                 if (r < 0) {
1439                         *exit_status = EXIT_SETSCHEDULER;
1440                         return -errno;
1441                 }
1442         }
1443
1444         if (context->cpuset)
1445                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1446                         *exit_status = EXIT_CPUAFFINITY;
1447                         return -errno;
1448                 }
1449
1450         if (context->ioprio_set)
1451                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1452                         *exit_status = EXIT_IOPRIO;
1453                         return -errno;
1454                 }
1455
1456         if (context->timer_slack_nsec != NSEC_INFINITY)
1457                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1458                         *exit_status = EXIT_TIMERSLACK;
1459                         return -errno;
1460                 }
1461
1462         if (context->personality != 0xffffffffUL)
1463                 if (personality(context->personality) < 0) {
1464                         *exit_status = EXIT_PERSONALITY;
1465                         return -errno;
1466                 }
1467
1468         if (context->utmp_id)
1469                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1470
1471         if (context->user && is_terminal_input(context->std_input)) {
1472                 r = chown_terminal(STDIN_FILENO, uid);
1473                 if (r < 0) {
1474                         *exit_status = EXIT_STDIN;
1475                         return r;
1476                 }
1477         }
1478
1479 #ifdef ENABLE_KDBUS
1480         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1481                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1482
1483                 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1484                 if (r < 0) {
1485                         *exit_status = EXIT_BUS_ENDPOINT;
1486                         return r;
1487                 }
1488         }
1489 #endif
1490
1491         /* If delegation is enabled we'll pass ownership of the cgroup
1492          * (but only in systemd's own controller hierarchy!) to the
1493          * user of the new process. */
1494         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1495                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1496                 if (r < 0) {
1497                         *exit_status = EXIT_CGROUP;
1498                         return r;
1499                 }
1500
1501
1502                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1503                 if (r < 0) {
1504                         *exit_status = EXIT_CGROUP;
1505                         return r;
1506                 }
1507         }
1508
1509         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1510                 char **rt;
1511
1512                 STRV_FOREACH(rt, context->runtime_directory) {
1513                         _cleanup_free_ char *p;
1514
1515                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1516                         if (!p) {
1517                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1518                                 return -ENOMEM;
1519                         }
1520
1521                         r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1522                         if (r < 0) {
1523                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1524                                 return r;
1525                         }
1526                 }
1527         }
1528
1529         if (params->apply_permissions) {
1530                 r = enforce_groups(context, username, gid);
1531                 if (r < 0) {
1532                         *exit_status = EXIT_GROUP;
1533                         return r;
1534                 }
1535         }
1536
1537         umask(context->umask);
1538
1539 #ifdef HAVE_PAM
1540         if (params->apply_permissions && context->pam_name && username) {
1541                 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1542                 if (r < 0) {
1543                         *exit_status = EXIT_PAM;
1544                         return r;
1545                 }
1546         }
1547 #endif
1548
1549         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1550                 r = setup_netns(runtime->netns_storage_socket);
1551                 if (r < 0) {
1552                         *exit_status = EXIT_NETWORK;
1553                         return r;
1554                 }
1555         }
1556
1557         if (!strv_isempty(context->read_write_dirs) ||
1558             !strv_isempty(context->read_only_dirs) ||
1559             !strv_isempty(context->inaccessible_dirs) ||
1560             context->mount_flags != 0 ||
1561             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1562             params->bus_endpoint_path ||
1563             context->private_devices ||
1564             context->protect_system != PROTECT_SYSTEM_NO ||
1565             context->protect_home != PROTECT_HOME_NO) {
1566
1567                 char *tmp = NULL, *var = NULL;
1568
1569                 /* The runtime struct only contains the parent
1570                  * of the private /tmp, which is
1571                  * non-accessible to world users. Inside of it
1572                  * there's a /tmp that is sticky, and that's
1573                  * the one we want to use here. */
1574
1575                 if (context->private_tmp && runtime) {
1576                         if (runtime->tmp_dir)
1577                                 tmp = strjoina(runtime->tmp_dir, "/tmp");
1578                         if (runtime->var_tmp_dir)
1579                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
1580                 }
1581
1582                 r = setup_namespace(
1583                                 context->read_write_dirs,
1584                                 context->read_only_dirs,
1585                                 context->inaccessible_dirs,
1586                                 tmp,
1587                                 var,
1588                                 params->bus_endpoint_path,
1589                                 context->private_devices,
1590                                 context->protect_home,
1591                                 context->protect_system,
1592                                 context->mount_flags);
1593
1594                 /* If we couldn't set up the namespace this is
1595                  * probably due to a missing capability. In this case,
1596                  * silently proceeed. */
1597                 if (r == -EPERM || r == -EACCES) {
1598                         log_open();
1599                         log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1600                         log_close();
1601                 } else if (r < 0) {
1602                         *exit_status = EXIT_NAMESPACE;
1603                         return r;
1604                 }
1605         }
1606
1607         if (params->apply_chroot) {
1608                 if (context->root_directory)
1609                         if (chroot(context->root_directory) < 0) {
1610                                 *exit_status = EXIT_CHROOT;
1611                                 return -errno;
1612                         }
1613
1614                 if (chdir(context->working_directory ?: "/") < 0 &&
1615                     !context->working_directory_missing_ok) {
1616                         *exit_status = EXIT_CHDIR;
1617                         return -errno;
1618                 }
1619         } else {
1620                 _cleanup_free_ char *d = NULL;
1621
1622                 if (asprintf(&d, "%s/%s",
1623                              context->root_directory ?: "",
1624                              context->working_directory ?: "") < 0) {
1625                         *exit_status = EXIT_MEMORY;
1626                         return -ENOMEM;
1627                 }
1628
1629                 if (chdir(d) < 0 &&
1630                     !context->working_directory_missing_ok) {
1631                         *exit_status = EXIT_CHDIR;
1632                         return -errno;
1633                 }
1634         }
1635
1636 #ifdef HAVE_SELINUX
1637         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1638                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1639                 if (r < 0) {
1640                         *exit_status = EXIT_SELINUX_CONTEXT;
1641                         return r;
1642                 }
1643         }
1644 #endif
1645
1646         /* We repeat the fd closing here, to make sure that
1647          * nothing is leaked from the PAM modules. Note that
1648          * we are more aggressive this time since socket_fd
1649          * and the netns fds we don't need anymore. The custom
1650          * endpoint fd was needed to upload the policy and can
1651          * now be closed as well. */
1652         r = close_all_fds(fds, n_fds);
1653         if (r >= 0)
1654                 r = shift_fds(fds, n_fds);
1655         if (r >= 0)
1656                 r = flags_fds(fds, n_fds, context->non_blocking);
1657         if (r < 0) {
1658                 *exit_status = EXIT_FDS;
1659                 return r;
1660         }
1661
1662         if (params->apply_permissions) {
1663
1664                 for (i = 0; i < _RLIMIT_MAX; i++) {
1665                         if (!context->rlimit[i])
1666                                 continue;
1667
1668                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1669                                 *exit_status = EXIT_LIMITS;
1670                                 return -errno;
1671                         }
1672                 }
1673
1674                 if (context->capability_bounding_set_drop) {
1675                         r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1676                         if (r < 0) {
1677                                 *exit_status = EXIT_CAPABILITIES;
1678                                 return r;
1679                         }
1680                 }
1681
1682 #ifdef HAVE_SMACK
1683                 if (context->smack_process_label) {
1684                         r = mac_smack_apply_pid(0, context->smack_process_label);
1685                         if (r < 0) {
1686                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1687                                 return r;
1688                         }
1689                 }
1690 #endif
1691
1692                 if (context->user) {
1693                         r = enforce_user(context, uid);
1694                         if (r < 0) {
1695                                 *exit_status = EXIT_USER;
1696                                 return r;
1697                         }
1698                 }
1699
1700                 /* PR_GET_SECUREBITS is not privileged, while
1701                  * PR_SET_SECUREBITS is. So to suppress
1702                  * potential EPERMs we'll try not to call
1703                  * PR_SET_SECUREBITS unless necessary. */
1704                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1705                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1706                                 *exit_status = EXIT_SECUREBITS;
1707                                 return -errno;
1708                         }
1709
1710                 if (context->capabilities)
1711                         if (cap_set_proc(context->capabilities) < 0) {
1712                                 *exit_status = EXIT_CAPABILITIES;
1713                                 return -errno;
1714                         }
1715
1716                 if (context->no_new_privileges)
1717                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1718                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1719                                 return -errno;
1720                         }
1721
1722 #ifdef HAVE_SECCOMP
1723                 if (context->address_families_whitelist ||
1724                     !set_isempty(context->address_families)) {
1725                         r = apply_address_families(context);
1726                         if (r < 0) {
1727                                 *exit_status = EXIT_ADDRESS_FAMILIES;
1728                                 return r;
1729                         }
1730                 }
1731
1732                 if (context->syscall_whitelist ||
1733                     !set_isempty(context->syscall_filter) ||
1734                     !set_isempty(context->syscall_archs)) {
1735                         r = apply_seccomp(context);
1736                         if (r < 0) {
1737                                 *exit_status = EXIT_SECCOMP;
1738                                 return r;
1739                         }
1740                 }
1741 #endif
1742
1743 #ifdef HAVE_SELINUX
1744                 if (mac_selinux_use()) {
1745                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1746
1747                         if (exec_context) {
1748                                 r = setexeccon(exec_context);
1749                                 if (r < 0) {
1750                                         *exit_status = EXIT_SELINUX_CONTEXT;
1751                                         return r;
1752                                 }
1753                         }
1754                 }
1755 #endif
1756
1757 #ifdef HAVE_APPARMOR
1758                 if (context->apparmor_profile && mac_apparmor_use()) {
1759                         r = aa_change_onexec(context->apparmor_profile);
1760                         if (r < 0 && !context->apparmor_profile_ignore) {
1761                                 *exit_status = EXIT_APPARMOR_PROFILE;
1762                                 return -errno;
1763                         }
1764                 }
1765 #endif
1766         }
1767
1768         r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1769         if (r < 0) {
1770                 *exit_status = EXIT_MEMORY;
1771                 return r;
1772         }
1773
1774         final_env = strv_env_merge(5,
1775                                    params->environment,
1776                                    our_env,
1777                                    context->environment,
1778                                    files_env,
1779                                    pam_env,
1780                                    NULL);
1781         if (!final_env) {
1782                 *exit_status = EXIT_MEMORY;
1783                 return -ENOMEM;
1784         }
1785
1786         final_argv = replace_env_argv(argv, final_env);
1787         if (!final_argv) {
1788                 *exit_status = EXIT_MEMORY;
1789                 return -ENOMEM;
1790         }
1791
1792         final_env = strv_env_clean(final_env);
1793
1794         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1795                 _cleanup_free_ char *line;
1796
1797                 line = exec_command_line(final_argv);
1798                 if (line) {
1799                         log_open();
1800                         log_unit_struct(params->unit_id,
1801                                         LOG_DEBUG,
1802                                         "EXECUTABLE=%s", command->path,
1803                                         LOG_MESSAGE("Executing: %s", line),
1804                                         NULL);
1805                         log_close();
1806                 }
1807         }
1808         execve(command->path, final_argv, final_env);
1809         *exit_status = EXIT_EXEC;
1810         return -errno;
1811 }
1812
1813 int exec_spawn(ExecCommand *command,
1814                const ExecContext *context,
1815                const ExecParameters *params,
1816                ExecRuntime *runtime,
1817                pid_t *ret) {
1818
1819         _cleanup_strv_free_ char **files_env = NULL;
1820         int *fds = NULL; unsigned n_fds = 0;
1821         _cleanup_free_ char *line = NULL;
1822         int socket_fd, r;
1823         char **argv;
1824         pid_t pid;
1825
1826         assert(command);
1827         assert(context);
1828         assert(ret);
1829         assert(params);
1830         assert(params->fds || params->n_fds <= 0);
1831
1832         if (context->std_input == EXEC_INPUT_SOCKET ||
1833             context->std_output == EXEC_OUTPUT_SOCKET ||
1834             context->std_error == EXEC_OUTPUT_SOCKET) {
1835
1836                 if (params->n_fds != 1) {
1837                         log_unit_error(params->unit_id, "Got more than one socket.");
1838                         return -EINVAL;
1839                 }
1840
1841                 socket_fd = params->fds[0];
1842         } else {
1843                 socket_fd = -1;
1844                 fds = params->fds;
1845                 n_fds = params->n_fds;
1846         }
1847
1848         r = exec_context_load_environment(context, params->unit_id, &files_env);
1849         if (r < 0)
1850                 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1851
1852         argv = params->argv ?: command->argv;
1853         line = exec_command_line(argv);
1854         if (!line)
1855                 return log_oom();
1856
1857         log_unit_struct(params->unit_id,
1858                         LOG_DEBUG,
1859                         "EXECUTABLE=%s", command->path,
1860                         LOG_MESSAGE("About to execute: %s", line),
1861                         NULL);
1862         pid = fork();
1863         if (pid < 0)
1864                 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1865
1866         if (pid == 0) {
1867                 int exit_status;
1868
1869                 r = exec_child(command,
1870                                context,
1871                                params,
1872                                runtime,
1873                                argv,
1874                                socket_fd,
1875                                fds, n_fds,
1876                                files_env,
1877                                &exit_status);
1878                 if (r < 0) {
1879                         log_open();
1880                         log_unit_struct(params->unit_id,
1881                                         LOG_ERR,
1882                                         LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1883                                         "EXECUTABLE=%s", command->path,
1884                                         LOG_MESSAGE("Failed at step %s spawning %s: %s",
1885                                                     exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1886                                                     command->path, strerror(-r)),
1887                                         LOG_ERRNO(r),
1888                                         NULL);
1889                 }
1890
1891                 _exit(exit_status);
1892         }
1893
1894         log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1895
1896         /* We add the new process to the cgroup both in the child (so
1897          * that we can be sure that no user code is ever executed
1898          * outside of the cgroup) and in the parent (so that we can be
1899          * sure that when we kill the cgroup the process will be
1900          * killed too). */
1901         if (params->cgroup_path)
1902                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1903
1904         exec_status_start(&command->exec_status, pid);
1905
1906         *ret = pid;
1907         return 0;
1908 }
1909
1910 void exec_context_init(ExecContext *c) {
1911         assert(c);
1912
1913         c->umask = 0022;
1914         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1915         c->cpu_sched_policy = SCHED_OTHER;
1916         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1917         c->syslog_level_prefix = true;
1918         c->ignore_sigpipe = true;
1919         c->timer_slack_nsec = NSEC_INFINITY;
1920         c->personality = 0xffffffffUL;
1921         c->runtime_directory_mode = 0755;
1922 }
1923
1924 void exec_context_done(ExecContext *c) {
1925         unsigned l;
1926
1927         assert(c);
1928
1929         strv_free(c->environment);
1930         c->environment = NULL;
1931
1932         strv_free(c->environment_files);
1933         c->environment_files = NULL;
1934
1935         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1936                 free(c->rlimit[l]);
1937                 c->rlimit[l] = NULL;
1938         }
1939
1940         free(c->working_directory);
1941         c->working_directory = NULL;
1942         free(c->root_directory);
1943         c->root_directory = NULL;
1944
1945         free(c->tty_path);
1946         c->tty_path = NULL;
1947
1948         free(c->syslog_identifier);
1949         c->syslog_identifier = NULL;
1950
1951         free(c->user);
1952         c->user = NULL;
1953
1954         free(c->group);
1955         c->group = NULL;
1956
1957         strv_free(c->supplementary_groups);
1958         c->supplementary_groups = NULL;
1959
1960         free(c->pam_name);
1961         c->pam_name = NULL;
1962
1963         if (c->capabilities) {
1964                 cap_free(c->capabilities);
1965                 c->capabilities = NULL;
1966         }
1967
1968         strv_free(c->read_only_dirs);
1969         c->read_only_dirs = NULL;
1970
1971         strv_free(c->read_write_dirs);
1972         c->read_write_dirs = NULL;
1973
1974         strv_free(c->inaccessible_dirs);
1975         c->inaccessible_dirs = NULL;
1976
1977         if (c->cpuset)
1978                 CPU_FREE(c->cpuset);
1979
1980         free(c->utmp_id);
1981         c->utmp_id = NULL;
1982
1983         free(c->selinux_context);
1984         c->selinux_context = NULL;
1985
1986         free(c->apparmor_profile);
1987         c->apparmor_profile = NULL;
1988
1989         set_free(c->syscall_filter);
1990         c->syscall_filter = NULL;
1991
1992         set_free(c->syscall_archs);
1993         c->syscall_archs = NULL;
1994
1995         set_free(c->address_families);
1996         c->address_families = NULL;
1997
1998         strv_free(c->runtime_directory);
1999         c->runtime_directory = NULL;
2000
2001         bus_endpoint_free(c->bus_endpoint);
2002         c->bus_endpoint = NULL;
2003 }
2004
2005 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2006         char **i;
2007
2008         assert(c);
2009
2010         if (!runtime_prefix)
2011                 return 0;
2012
2013         STRV_FOREACH(i, c->runtime_directory) {
2014                 _cleanup_free_ char *p;
2015
2016                 p = strjoin(runtime_prefix, "/", *i, NULL);
2017                 if (!p)
2018                         return -ENOMEM;
2019
2020                 /* We execute this synchronously, since we need to be
2021                  * sure this is gone when we start the service
2022                  * next. */
2023                 rm_rf(p, false, true, false);
2024         }
2025
2026         return 0;
2027 }
2028
2029 void exec_command_done(ExecCommand *c) {
2030         assert(c);
2031
2032         free(c->path);
2033         c->path = NULL;
2034
2035         strv_free(c->argv);
2036         c->argv = NULL;
2037 }
2038
2039 void exec_command_done_array(ExecCommand *c, unsigned n) {
2040         unsigned i;
2041
2042         for (i = 0; i < n; i++)
2043                 exec_command_done(c+i);
2044 }
2045
2046 ExecCommand* exec_command_free_list(ExecCommand *c) {
2047         ExecCommand *i;
2048
2049         while ((i = c)) {
2050                 LIST_REMOVE(command, c, i);
2051                 exec_command_done(i);
2052                 free(i);
2053         }
2054
2055         return NULL;
2056 }
2057
2058 void exec_command_free_array(ExecCommand **c, unsigned n) {
2059         unsigned i;
2060
2061         for (i = 0; i < n; i++)
2062                 c[i] = exec_command_free_list(c[i]);
2063 }
2064
2065 typedef struct InvalidEnvInfo {
2066         const char *unit_id;
2067         const char *path;
2068 } InvalidEnvInfo;
2069
2070 static void invalid_env(const char *p, void *userdata) {
2071         InvalidEnvInfo *info = userdata;
2072
2073         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2074 }
2075
2076 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2077         char **i, **r = NULL;
2078
2079         assert(c);
2080         assert(l);
2081
2082         STRV_FOREACH(i, c->environment_files) {
2083                 char *fn;
2084                 int k;
2085                 bool ignore = false;
2086                 char **p;
2087                 _cleanup_globfree_ glob_t pglob = {};
2088                 int count, n;
2089
2090                 fn = *i;
2091
2092                 if (fn[0] == '-') {
2093                         ignore = true;
2094                         fn ++;
2095                 }
2096
2097                 if (!path_is_absolute(fn)) {
2098                         if (ignore)
2099                                 continue;
2100
2101                         strv_free(r);
2102                         return -EINVAL;
2103                 }
2104
2105                 /* Filename supports globbing, take all matching files */
2106                 errno = 0;
2107                 if (glob(fn, 0, NULL, &pglob) != 0) {
2108                         if (ignore)
2109                                 continue;
2110
2111                         strv_free(r);
2112                         return errno ? -errno : -EINVAL;
2113                 }
2114                 count = pglob.gl_pathc;
2115                 if (count == 0) {
2116                         if (ignore)
2117                                 continue;
2118
2119                         strv_free(r);
2120                         return -EINVAL;
2121                 }
2122                 for (n = 0; n < count; n++) {
2123                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2124                         if (k < 0) {
2125                                 if (ignore)
2126                                         continue;
2127
2128                                 strv_free(r);
2129                                 return k;
2130                         }
2131                         /* Log invalid environment variables with filename */
2132                         if (p) {
2133                                 InvalidEnvInfo info = {
2134                                         .unit_id = unit_id,
2135                                         .path = pglob.gl_pathv[n]
2136                                 };
2137
2138                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2139                         }
2140
2141                         if (r == NULL)
2142                                 r = p;
2143                         else {
2144                                 char **m;
2145
2146                                 m = strv_env_merge(2, r, p);
2147                                 strv_free(r);
2148                                 strv_free(p);
2149                                 if (!m)
2150                                         return -ENOMEM;
2151
2152                                 r = m;
2153                         }
2154                 }
2155         }
2156
2157         *l = r;
2158
2159         return 0;
2160 }
2161
2162 static bool tty_may_match_dev_console(const char *tty) {
2163         _cleanup_free_ char *active = NULL;
2164        char *console;
2165
2166         if (startswith(tty, "/dev/"))
2167                 tty += 5;
2168
2169         /* trivial identity? */
2170         if (streq(tty, "console"))
2171                 return true;
2172
2173         console = resolve_dev_console(&active);
2174         /* if we could not resolve, assume it may */
2175         if (!console)
2176                 return true;
2177
2178         /* "tty0" means the active VC, so it may be the same sometimes */
2179         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2180 }
2181
2182 bool exec_context_may_touch_console(ExecContext *ec) {
2183         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2184                 is_terminal_input(ec->std_input) ||
2185                 is_terminal_output(ec->std_output) ||
2186                 is_terminal_output(ec->std_error)) &&
2187                tty_may_match_dev_console(tty_path(ec));
2188 }
2189
2190 static void strv_fprintf(FILE *f, char **l) {
2191         char **g;
2192
2193         assert(f);
2194
2195         STRV_FOREACH(g, l)
2196                 fprintf(f, " %s", *g);
2197 }
2198
2199 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2200         char **e;
2201         unsigned i;
2202
2203         assert(c);
2204         assert(f);
2205
2206         prefix = strempty(prefix);
2207
2208         fprintf(f,
2209                 "%sUMask: %04o\n"
2210                 "%sWorkingDirectory: %s\n"
2211                 "%sRootDirectory: %s\n"
2212                 "%sNonBlocking: %s\n"
2213                 "%sPrivateTmp: %s\n"
2214                 "%sPrivateNetwork: %s\n"
2215                 "%sPrivateDevices: %s\n"
2216                 "%sProtectHome: %s\n"
2217                 "%sProtectSystem: %s\n"
2218                 "%sIgnoreSIGPIPE: %s\n",
2219                 prefix, c->umask,
2220                 prefix, c->working_directory ? c->working_directory : "/",
2221                 prefix, c->root_directory ? c->root_directory : "/",
2222                 prefix, yes_no(c->non_blocking),
2223                 prefix, yes_no(c->private_tmp),
2224                 prefix, yes_no(c->private_network),
2225                 prefix, yes_no(c->private_devices),
2226                 prefix, protect_home_to_string(c->protect_home),
2227                 prefix, protect_system_to_string(c->protect_system),
2228                 prefix, yes_no(c->ignore_sigpipe));
2229
2230         STRV_FOREACH(e, c->environment)
2231                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2232
2233         STRV_FOREACH(e, c->environment_files)
2234                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2235
2236         if (c->nice_set)
2237                 fprintf(f,
2238                         "%sNice: %i\n",
2239                         prefix, c->nice);
2240
2241         if (c->oom_score_adjust_set)
2242                 fprintf(f,
2243                         "%sOOMScoreAdjust: %i\n",
2244                         prefix, c->oom_score_adjust);
2245
2246         for (i = 0; i < RLIM_NLIMITS; i++)
2247                 if (c->rlimit[i])
2248                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2249                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2250
2251         if (c->ioprio_set) {
2252                 _cleanup_free_ char *class_str = NULL;
2253
2254                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2255                 fprintf(f,
2256                         "%sIOSchedulingClass: %s\n"
2257                         "%sIOPriority: %i\n",
2258                         prefix, strna(class_str),
2259                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2260         }
2261
2262         if (c->cpu_sched_set) {
2263                 _cleanup_free_ char *policy_str = NULL;
2264
2265                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2266                 fprintf(f,
2267                         "%sCPUSchedulingPolicy: %s\n"
2268                         "%sCPUSchedulingPriority: %i\n"
2269                         "%sCPUSchedulingResetOnFork: %s\n",
2270                         prefix, strna(policy_str),
2271                         prefix, c->cpu_sched_priority,
2272                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2273         }
2274
2275         if (c->cpuset) {
2276                 fprintf(f, "%sCPUAffinity:", prefix);
2277                 for (i = 0; i < c->cpuset_ncpus; i++)
2278                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2279                                 fprintf(f, " %u", i);
2280                 fputs("\n", f);
2281         }
2282
2283         if (c->timer_slack_nsec != NSEC_INFINITY)
2284                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2285
2286         fprintf(f,
2287                 "%sStandardInput: %s\n"
2288                 "%sStandardOutput: %s\n"
2289                 "%sStandardError: %s\n",
2290                 prefix, exec_input_to_string(c->std_input),
2291                 prefix, exec_output_to_string(c->std_output),
2292                 prefix, exec_output_to_string(c->std_error));
2293
2294         if (c->tty_path)
2295                 fprintf(f,
2296                         "%sTTYPath: %s\n"
2297                         "%sTTYReset: %s\n"
2298                         "%sTTYVHangup: %s\n"
2299                         "%sTTYVTDisallocate: %s\n",
2300                         prefix, c->tty_path,
2301                         prefix, yes_no(c->tty_reset),
2302                         prefix, yes_no(c->tty_vhangup),
2303                         prefix, yes_no(c->tty_vt_disallocate));
2304
2305         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2306             c->std_output == EXEC_OUTPUT_KMSG ||
2307             c->std_output == EXEC_OUTPUT_JOURNAL ||
2308             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2309             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2310             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2311             c->std_error == EXEC_OUTPUT_SYSLOG ||
2312             c->std_error == EXEC_OUTPUT_KMSG ||
2313             c->std_error == EXEC_OUTPUT_JOURNAL ||
2314             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2315             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2316             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2317
2318                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2319
2320                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2321                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2322
2323                 fprintf(f,
2324                         "%sSyslogFacility: %s\n"
2325                         "%sSyslogLevel: %s\n",
2326                         prefix, strna(fac_str),
2327                         prefix, strna(lvl_str));
2328         }
2329
2330         if (c->capabilities) {
2331                 _cleanup_cap_free_charp_ char *t;
2332
2333                 t = cap_to_text(c->capabilities, NULL);
2334                 if (t)
2335                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2336         }
2337
2338         if (c->secure_bits)
2339                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2340                         prefix,
2341                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2342                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2343                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2344                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2345                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2346                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2347
2348         if (c->capability_bounding_set_drop) {
2349                 unsigned long l;
2350                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2351
2352                 for (l = 0; l <= cap_last_cap(); l++)
2353                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2354                                 fprintf(f, " %s", strna(capability_to_name(l)));
2355
2356                 fputs("\n", f);
2357         }
2358
2359         if (c->user)
2360                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2361         if (c->group)
2362                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2363
2364         if (strv_length(c->supplementary_groups) > 0) {
2365                 fprintf(f, "%sSupplementaryGroups:", prefix);
2366                 strv_fprintf(f, c->supplementary_groups);
2367                 fputs("\n", f);
2368         }
2369
2370         if (c->pam_name)
2371                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2372
2373         if (strv_length(c->read_write_dirs) > 0) {
2374                 fprintf(f, "%sReadWriteDirs:", prefix);
2375                 strv_fprintf(f, c->read_write_dirs);
2376                 fputs("\n", f);
2377         }
2378
2379         if (strv_length(c->read_only_dirs) > 0) {
2380                 fprintf(f, "%sReadOnlyDirs:", prefix);
2381                 strv_fprintf(f, c->read_only_dirs);
2382                 fputs("\n", f);
2383         }
2384
2385         if (strv_length(c->inaccessible_dirs) > 0) {
2386                 fprintf(f, "%sInaccessibleDirs:", prefix);
2387                 strv_fprintf(f, c->inaccessible_dirs);
2388                 fputs("\n", f);
2389         }
2390
2391         if (c->utmp_id)
2392                 fprintf(f,
2393                         "%sUtmpIdentifier: %s\n",
2394                         prefix, c->utmp_id);
2395
2396         if (c->selinux_context)
2397                 fprintf(f,
2398                         "%sSELinuxContext: %s%s\n",
2399                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2400
2401         if (c->personality != 0xffffffffUL)
2402                 fprintf(f,
2403                         "%sPersonality: %s\n",
2404                         prefix, strna(personality_to_string(c->personality)));
2405
2406         if (c->syscall_filter) {
2407 #ifdef HAVE_SECCOMP
2408                 Iterator j;
2409                 void *id;
2410                 bool first = true;
2411 #endif
2412
2413                 fprintf(f,
2414                         "%sSystemCallFilter: ",
2415                         prefix);
2416
2417                 if (!c->syscall_whitelist)
2418                         fputc('~', f);
2419
2420 #ifdef HAVE_SECCOMP
2421                 SET_FOREACH(id, c->syscall_filter, j) {
2422                         _cleanup_free_ char *name = NULL;
2423
2424                         if (first)
2425                                 first = false;
2426                         else
2427                                 fputc(' ', f);
2428
2429                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2430                         fputs(strna(name), f);
2431                 }
2432 #endif
2433
2434                 fputc('\n', f);
2435         }
2436
2437         if (c->syscall_archs) {
2438 #ifdef HAVE_SECCOMP
2439                 Iterator j;
2440                 void *id;
2441 #endif
2442
2443                 fprintf(f,
2444                         "%sSystemCallArchitectures:",
2445                         prefix);
2446
2447 #ifdef HAVE_SECCOMP
2448                 SET_FOREACH(id, c->syscall_archs, j)
2449                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2450 #endif
2451                 fputc('\n', f);
2452         }
2453
2454         if (c->syscall_errno != 0)
2455                 fprintf(f,
2456                         "%sSystemCallErrorNumber: %s\n",
2457                         prefix, strna(errno_to_name(c->syscall_errno)));
2458
2459         if (c->apparmor_profile)
2460                 fprintf(f,
2461                         "%sAppArmorProfile: %s%s\n",
2462                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2463 }
2464
2465 bool exec_context_maintains_privileges(ExecContext *c) {
2466         assert(c);
2467
2468         /* Returns true if the process forked off would run run under
2469          * an unchanged UID or as root. */
2470
2471         if (!c->user)
2472                 return true;
2473
2474         if (streq(c->user, "root") || streq(c->user, "0"))
2475                 return true;
2476
2477         return false;
2478 }
2479
2480 void exec_status_start(ExecStatus *s, pid_t pid) {
2481         assert(s);
2482
2483         zero(*s);
2484         s->pid = pid;
2485         dual_timestamp_get(&s->start_timestamp);
2486 }
2487
2488 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2489         assert(s);
2490
2491         if (s->pid && s->pid != pid)
2492                 zero(*s);
2493
2494         s->pid = pid;
2495         dual_timestamp_get(&s->exit_timestamp);
2496
2497         s->code = code;
2498         s->status = status;
2499
2500         if (context) {
2501                 if (context->utmp_id)
2502                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2503
2504                 exec_context_tty_reset(context);
2505         }
2506 }
2507
2508 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2509         char buf[FORMAT_TIMESTAMP_MAX];
2510
2511         assert(s);
2512         assert(f);
2513
2514         if (s->pid <= 0)
2515                 return;
2516
2517         prefix = strempty(prefix);
2518
2519         fprintf(f,
2520                 "%sPID: "PID_FMT"\n",
2521                 prefix, s->pid);
2522
2523         if (s->start_timestamp.realtime > 0)
2524                 fprintf(f,
2525                         "%sStart Timestamp: %s\n",
2526                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2527
2528         if (s->exit_timestamp.realtime > 0)
2529                 fprintf(f,
2530                         "%sExit Timestamp: %s\n"
2531                         "%sExit Code: %s\n"
2532                         "%sExit Status: %i\n",
2533                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2534                         prefix, sigchld_code_to_string(s->code),
2535                         prefix, s->status);
2536 }
2537
2538 char *exec_command_line(char **argv) {
2539         size_t k;
2540         char *n, *p, **a;
2541         bool first = true;
2542
2543         assert(argv);
2544
2545         k = 1;
2546         STRV_FOREACH(a, argv)
2547                 k += strlen(*a)+3;
2548
2549         if (!(n = new(char, k)))
2550                 return NULL;
2551
2552         p = n;
2553         STRV_FOREACH(a, argv) {
2554
2555                 if (!first)
2556                         *(p++) = ' ';
2557                 else
2558                         first = false;
2559
2560                 if (strpbrk(*a, WHITESPACE)) {
2561                         *(p++) = '\'';
2562                         p = stpcpy(p, *a);
2563                         *(p++) = '\'';
2564                 } else
2565                         p = stpcpy(p, *a);
2566
2567         }
2568
2569         *p = 0;
2570
2571         /* FIXME: this doesn't really handle arguments that have
2572          * spaces and ticks in them */
2573
2574         return n;
2575 }
2576
2577 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2578         _cleanup_free_ char *cmd = NULL;
2579         const char *prefix2;
2580
2581         assert(c);
2582         assert(f);
2583
2584         prefix = strempty(prefix);
2585         prefix2 = strjoina(prefix, "\t");
2586
2587         cmd = exec_command_line(c->argv);
2588         fprintf(f,
2589                 "%sCommand Line: %s\n",
2590                 prefix, cmd ? cmd : strerror(ENOMEM));
2591
2592         exec_status_dump(&c->exec_status, f, prefix2);
2593 }
2594
2595 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2596         assert(f);
2597
2598         prefix = strempty(prefix);
2599
2600         LIST_FOREACH(command, c, c)
2601                 exec_command_dump(c, f, prefix);
2602 }
2603
2604 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2605         ExecCommand *end;
2606
2607         assert(l);
2608         assert(e);
2609
2610         if (*l) {
2611                 /* It's kind of important, that we keep the order here */
2612                 LIST_FIND_TAIL(command, *l, end);
2613                 LIST_INSERT_AFTER(command, *l, end, e);
2614         } else
2615               *l = e;
2616 }
2617
2618 int exec_command_set(ExecCommand *c, const char *path, ...) {
2619         va_list ap;
2620         char **l, *p;
2621
2622         assert(c);
2623         assert(path);
2624
2625         va_start(ap, path);
2626         l = strv_new_ap(path, ap);
2627         va_end(ap);
2628
2629         if (!l)
2630                 return -ENOMEM;
2631
2632         p = strdup(path);
2633         if (!p) {
2634                 strv_free(l);
2635                 return -ENOMEM;
2636         }
2637
2638         free(c->path);
2639         c->path = p;
2640
2641         strv_free(c->argv);
2642         c->argv = l;
2643
2644         return 0;
2645 }
2646
2647 int exec_command_append(ExecCommand *c, const char *path, ...) {
2648         _cleanup_strv_free_ char **l = NULL;
2649         va_list ap;
2650         int r;
2651
2652         assert(c);
2653         assert(path);
2654
2655         va_start(ap, path);
2656         l = strv_new_ap(path, ap);
2657         va_end(ap);
2658
2659         if (!l)
2660                 return -ENOMEM;
2661
2662         r = strv_extend_strv(&c->argv, l);
2663         if (r < 0)
2664                 return r;
2665
2666         return 0;
2667 }
2668
2669
2670 static int exec_runtime_allocate(ExecRuntime **rt) {
2671
2672         if (*rt)
2673                 return 0;
2674
2675         *rt = new0(ExecRuntime, 1);
2676         if (!*rt)
2677                 return -ENOMEM;
2678
2679         (*rt)->n_ref = 1;
2680         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2681
2682         return 0;
2683 }
2684
2685 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2686         int r;
2687
2688         assert(rt);
2689         assert(c);
2690         assert(id);
2691
2692         if (*rt)
2693                 return 1;
2694
2695         if (!c->private_network && !c->private_tmp)
2696                 return 0;
2697
2698         r = exec_runtime_allocate(rt);
2699         if (r < 0)
2700                 return r;
2701
2702         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2703                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2704                         return -errno;
2705         }
2706
2707         if (c->private_tmp && !(*rt)->tmp_dir) {
2708                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2709                 if (r < 0)
2710                         return r;
2711         }
2712
2713         return 1;
2714 }
2715
2716 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2717         assert(r);
2718         assert(r->n_ref > 0);
2719
2720         r->n_ref++;
2721         return r;
2722 }
2723
2724 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2725
2726         if (!r)
2727                 return NULL;
2728
2729         assert(r->n_ref > 0);
2730
2731         r->n_ref--;
2732         if (r->n_ref <= 0) {
2733                 free(r->tmp_dir);
2734                 free(r->var_tmp_dir);
2735                 safe_close_pair(r->netns_storage_socket);
2736                 free(r);
2737         }
2738
2739         return NULL;
2740 }
2741
2742 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2743         assert(u);
2744         assert(f);
2745         assert(fds);
2746
2747         if (!rt)
2748                 return 0;
2749
2750         if (rt->tmp_dir)
2751                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2752
2753         if (rt->var_tmp_dir)
2754                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2755
2756         if (rt->netns_storage_socket[0] >= 0) {
2757                 int copy;
2758
2759                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2760                 if (copy < 0)
2761                         return copy;
2762
2763                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2764         }
2765
2766         if (rt->netns_storage_socket[1] >= 0) {
2767                 int copy;
2768
2769                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2770                 if (copy < 0)
2771                         return copy;
2772
2773                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2774         }
2775
2776         return 0;
2777 }
2778
2779 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2780         int r;
2781
2782         assert(rt);
2783         assert(key);
2784         assert(value);
2785
2786         if (streq(key, "tmp-dir")) {
2787                 char *copy;
2788
2789                 r = exec_runtime_allocate(rt);
2790                 if (r < 0)
2791                         return r;
2792
2793                 copy = strdup(value);
2794                 if (!copy)
2795                         return log_oom();
2796
2797                 free((*rt)->tmp_dir);
2798                 (*rt)->tmp_dir = copy;
2799
2800         } else if (streq(key, "var-tmp-dir")) {
2801                 char *copy;
2802
2803                 r = exec_runtime_allocate(rt);
2804                 if (r < 0)
2805                         return r;
2806
2807                 copy = strdup(value);
2808                 if (!copy)
2809                         return log_oom();
2810
2811                 free((*rt)->var_tmp_dir);
2812                 (*rt)->var_tmp_dir = copy;
2813
2814         } else if (streq(key, "netns-socket-0")) {
2815                 int fd;
2816
2817                 r = exec_runtime_allocate(rt);
2818                 if (r < 0)
2819                         return r;
2820
2821                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2822                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2823                 else {
2824                         safe_close((*rt)->netns_storage_socket[0]);
2825                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2826                 }
2827         } else if (streq(key, "netns-socket-1")) {
2828                 int fd;
2829
2830                 r = exec_runtime_allocate(rt);
2831                 if (r < 0)
2832                         return r;
2833
2834                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2835                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2836                 else {
2837                         safe_close((*rt)->netns_storage_socket[1]);
2838                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2839                 }
2840         } else
2841                 return 0;
2842
2843         return 1;
2844 }
2845
2846 static void *remove_tmpdir_thread(void *p) {
2847         _cleanup_free_ char *path = p;
2848
2849         rm_rf_dangerous(path, false, true, false);
2850         return NULL;
2851 }
2852
2853 void exec_runtime_destroy(ExecRuntime *rt) {
2854         int r;
2855
2856         if (!rt)
2857                 return;
2858
2859         /* If there are multiple users of this, let's leave the stuff around */
2860         if (rt->n_ref > 1)
2861                 return;
2862
2863         if (rt->tmp_dir) {
2864                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2865
2866                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2867                 if (r < 0) {
2868                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2869                         free(rt->tmp_dir);
2870                 }
2871
2872                 rt->tmp_dir = NULL;
2873         }
2874
2875         if (rt->var_tmp_dir) {
2876                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2877
2878                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2879                 if (r < 0) {
2880                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2881                         free(rt->var_tmp_dir);
2882                 }
2883
2884                 rt->var_tmp_dir = NULL;
2885         }
2886
2887         safe_close_pair(rt->netns_storage_socket);
2888 }
2889
2890 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2891         [EXEC_INPUT_NULL] = "null",
2892         [EXEC_INPUT_TTY] = "tty",
2893         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2894         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2895         [EXEC_INPUT_SOCKET] = "socket"
2896 };
2897
2898 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2899
2900 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2901         [EXEC_OUTPUT_INHERIT] = "inherit",
2902         [EXEC_OUTPUT_NULL] = "null",
2903         [EXEC_OUTPUT_TTY] = "tty",
2904         [EXEC_OUTPUT_SYSLOG] = "syslog",
2905         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2906         [EXEC_OUTPUT_KMSG] = "kmsg",
2907         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2908         [EXEC_OUTPUT_JOURNAL] = "journal",
2909         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2910         [EXEC_OUTPUT_SOCKET] = "socket"
2911 };
2912
2913 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);