chiark / gitweb /
tree-wide: remove unnecessary LOG_PRI
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104         int start, restart_from;
105
106         if (n_fds <= 0)
107                 return 0;
108
109         /* Modifies the fds array! (sorts it) */
110
111         assert(fds);
112
113         start = 0;
114         for (;;) {
115                 int i;
116
117                 restart_from = -1;
118
119                 for (i = start; i < (int) n_fds; i++) {
120                         int nfd;
121
122                         /* Already at right index? */
123                         if (fds[i] == i+3)
124                                 continue;
125
126                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127                                 return -errno;
128
129                         safe_close(fds[i]);
130                         fds[i] = nfd;
131
132                         /* Hmm, the fd we wanted isn't free? Then
133                          * let's remember that and try again from here */
134                         if (nfd != i+3 && restart_from < 0)
135                                 restart_from = i;
136                 }
137
138                 if (restart_from < 0)
139                         break;
140
141                 start = restart_from;
142         }
143
144         return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148         unsigned i;
149         int r;
150
151         if (n_fds <= 0)
152                 return 0;
153
154         assert(fds);
155
156         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158         for (i = 0; i < n_fds; i++) {
159
160                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161                         return r;
162
163                 /* We unconditionally drop FD_CLOEXEC from the fds,
164                  * since after all we want to pass these fds to our
165                  * children */
166
167                 if ((r = fd_cloexec(fds[i], false)) < 0)
168                         return r;
169         }
170
171         return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175         assert(context);
176
177         if (context->tty_path)
178                 return context->tty_path;
179
180         return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184         assert(context);
185
186         if (context->tty_vhangup)
187                 terminal_vhangup(tty_path(context));
188
189         if (context->tty_reset)
190                 reset_terminal(tty_path(context));
191
192         if (context->tty_vt_disallocate && context->tty_path)
193                 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197         return
198                 o == EXEC_OUTPUT_TTY ||
199                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205         int fd, r;
206
207         assert(nfd >= 0);
208
209         fd = open("/dev/null", flags|O_NOCTTY);
210         if (fd < 0)
211                 return -errno;
212
213         if (fd != nfd) {
214                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215                 safe_close(fd);
216         } else
217                 r = nfd;
218
219         return r;
220 }
221
222 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
223         union sockaddr_union sa = {
224                 .un.sun_family = AF_UNIX,
225                 .un.sun_path = "/run/systemd/journal/stdout",
226         };
227         uid_t olduid = UID_INVALID;
228         gid_t oldgid = GID_INVALID;
229         int r;
230
231         if (gid != GID_INVALID) {
232                 oldgid = getgid();
233
234                 r = setegid(gid);
235                 if (r < 0)
236                         return -errno;
237         }
238
239         if (uid != UID_INVALID) {
240                 olduid = getuid();
241
242                 r = seteuid(uid);
243                 if (r < 0) {
244                         r = -errno;
245                         goto restore_gid;
246                 }
247         }
248
249         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
250         if (r < 0)
251                 r = -errno;
252
253         /* If we fail to restore the uid or gid, things will likely
254            fail later on. This should only happen if an LSM interferes. */
255
256         if (uid != UID_INVALID)
257                 (void) seteuid(olduid);
258
259  restore_gid:
260         if (gid != GID_INVALID)
261                 (void) setegid(oldgid);
262
263         return r;
264 }
265
266 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
267         int fd, r;
268
269         assert(context);
270         assert(output < _EXEC_OUTPUT_MAX);
271         assert(ident);
272         assert(nfd >= 0);
273
274         fd = socket(AF_UNIX, SOCK_STREAM, 0);
275         if (fd < 0)
276                 return -errno;
277
278         r = connect_journal_socket(fd, uid, gid);
279         if (r < 0)
280                 return r;
281
282         if (shutdown(fd, SHUT_RD) < 0) {
283                 safe_close(fd);
284                 return -errno;
285         }
286
287         fd_inc_sndbuf(fd, SNDBUF_SIZE);
288
289         dprintf(fd,
290                 "%s\n"
291                 "%s\n"
292                 "%i\n"
293                 "%i\n"
294                 "%i\n"
295                 "%i\n"
296                 "%i\n",
297                 context->syslog_identifier ? context->syslog_identifier : ident,
298                 unit_id,
299                 context->syslog_priority,
300                 !!context->syslog_level_prefix,
301                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
302                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
303                 is_terminal_output(output));
304
305         if (fd != nfd) {
306                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
307                 safe_close(fd);
308         } else
309                 r = nfd;
310
311         return r;
312 }
313 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
314         int fd, r;
315
316         assert(path);
317         assert(nfd >= 0);
318
319         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
320                 return fd;
321
322         if (fd != nfd) {
323                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
324                 safe_close(fd);
325         } else
326                 r = nfd;
327
328         return r;
329 }
330
331 static bool is_terminal_input(ExecInput i) {
332         return
333                 i == EXEC_INPUT_TTY ||
334                 i == EXEC_INPUT_TTY_FORCE ||
335                 i == EXEC_INPUT_TTY_FAIL;
336 }
337
338 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
339
340         if (is_terminal_input(std_input) && !apply_tty_stdin)
341                 return EXEC_INPUT_NULL;
342
343         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
344                 return EXEC_INPUT_NULL;
345
346         return std_input;
347 }
348
349 static int fixup_output(ExecOutput std_output, int socket_fd) {
350
351         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
352                 return EXEC_OUTPUT_INHERIT;
353
354         return std_output;
355 }
356
357 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
358         ExecInput i;
359
360         assert(context);
361
362         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
363
364         switch (i) {
365
366         case EXEC_INPUT_NULL:
367                 return open_null_as(O_RDONLY, STDIN_FILENO);
368
369         case EXEC_INPUT_TTY:
370         case EXEC_INPUT_TTY_FORCE:
371         case EXEC_INPUT_TTY_FAIL: {
372                 int fd, r;
373
374                 fd = acquire_terminal(tty_path(context),
375                                       i == EXEC_INPUT_TTY_FAIL,
376                                       i == EXEC_INPUT_TTY_FORCE,
377                                       false,
378                                       USEC_INFINITY);
379                 if (fd < 0)
380                         return fd;
381
382                 if (fd != STDIN_FILENO) {
383                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
384                         safe_close(fd);
385                 } else
386                         r = STDIN_FILENO;
387
388                 return r;
389         }
390
391         case EXEC_INPUT_SOCKET:
392                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
393
394         default:
395                 assert_not_reached("Unknown input type");
396         }
397 }
398
399 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
400         ExecOutput o;
401         ExecInput i;
402         int r;
403
404         assert(context);
405         assert(ident);
406
407         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
408         o = fixup_output(context->std_output, socket_fd);
409
410         if (fileno == STDERR_FILENO) {
411                 ExecOutput e;
412                 e = fixup_output(context->std_error, socket_fd);
413
414                 /* This expects the input and output are already set up */
415
416                 /* Don't change the stderr file descriptor if we inherit all
417                  * the way and are not on a tty */
418                 if (e == EXEC_OUTPUT_INHERIT &&
419                     o == EXEC_OUTPUT_INHERIT &&
420                     i == EXEC_INPUT_NULL &&
421                     !is_terminal_input(context->std_input) &&
422                     getppid () != 1)
423                         return fileno;
424
425                 /* Duplicate from stdout if possible */
426                 if (e == o || e == EXEC_OUTPUT_INHERIT)
427                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
428
429                 o = e;
430
431         } else if (o == EXEC_OUTPUT_INHERIT) {
432                 /* If input got downgraded, inherit the original value */
433                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
434                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
435
436                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437                 if (i != EXEC_INPUT_NULL)
438                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
439
440                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
441                 if (getppid() != 1)
442                         return fileno;
443
444                 /* We need to open /dev/null here anew, to get the right access mode. */
445                 return open_null_as(O_WRONLY, fileno);
446         }
447
448         switch (o) {
449
450         case EXEC_OUTPUT_NULL:
451                 return open_null_as(O_WRONLY, fileno);
452
453         case EXEC_OUTPUT_TTY:
454                 if (is_terminal_input(i))
455                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
456
457                 /* We don't reset the terminal if this is just about output */
458                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
459
460         case EXEC_OUTPUT_SYSLOG:
461         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
462         case EXEC_OUTPUT_KMSG:
463         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
464         case EXEC_OUTPUT_JOURNAL:
465         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
466                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
467                 if (r < 0) {
468                         log_unit_struct(unit_id,
469                                         LOG_ERR,
470                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
471                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
472                                                     unit_id, strerror(-r)),
473                                         LOG_ERRNO(-r),
474                                         NULL);
475                         r = open_null_as(O_WRONLY, fileno);
476                 }
477                 return r;
478
479         case EXEC_OUTPUT_SOCKET:
480                 assert(socket_fd >= 0);
481                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
482
483         default:
484                 assert_not_reached("Unknown error type");
485         }
486 }
487
488 static int chown_terminal(int fd, uid_t uid) {
489         struct stat st;
490
491         assert(fd >= 0);
492
493         /* This might fail. What matters are the results. */
494         (void) fchown(fd, uid, -1);
495         (void) fchmod(fd, TTY_MODE);
496
497         if (fstat(fd, &st) < 0)
498                 return -errno;
499
500         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
501                 return -EPERM;
502
503         return 0;
504 }
505
506 static int setup_confirm_stdio(int *_saved_stdin,
507                                int *_saved_stdout) {
508         int fd = -1, saved_stdin, saved_stdout = -1, r;
509
510         assert(_saved_stdin);
511         assert(_saved_stdout);
512
513         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
514         if (saved_stdin < 0)
515                 return -errno;
516
517         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
518         if (saved_stdout < 0) {
519                 r = errno;
520                 goto fail;
521         }
522
523         fd = acquire_terminal(
524                         "/dev/console",
525                         false,
526                         false,
527                         false,
528                         DEFAULT_CONFIRM_USEC);
529         if (fd < 0) {
530                 r = fd;
531                 goto fail;
532         }
533
534         r = chown_terminal(fd, getuid());
535         if (r < 0)
536                 goto fail;
537
538         if (dup2(fd, STDIN_FILENO) < 0) {
539                 r = -errno;
540                 goto fail;
541         }
542
543         if (dup2(fd, STDOUT_FILENO) < 0) {
544                 r = -errno;
545                 goto fail;
546         }
547
548         if (fd >= 2)
549                 safe_close(fd);
550
551         *_saved_stdin = saved_stdin;
552         *_saved_stdout = saved_stdout;
553
554         return 0;
555
556 fail:
557         safe_close(saved_stdout);
558         safe_close(saved_stdin);
559         safe_close(fd);
560
561         return r;
562 }
563
564 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
565         _cleanup_close_ int fd = -1;
566         va_list ap;
567
568         assert(format);
569
570         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
571         if (fd < 0)
572                 return fd;
573
574         va_start(ap, format);
575         vdprintf(fd, format, ap);
576         va_end(ap);
577
578         return 0;
579 }
580
581 static int restore_confirm_stdio(int *saved_stdin,
582                                  int *saved_stdout) {
583
584         int r = 0;
585
586         assert(saved_stdin);
587         assert(saved_stdout);
588
589         release_terminal();
590
591         if (*saved_stdin >= 0)
592                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
593                         r = -errno;
594
595         if (*saved_stdout >= 0)
596                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
597                         r = -errno;
598
599         safe_close(*saved_stdin);
600         safe_close(*saved_stdout);
601
602         return r;
603 }
604
605 static int ask_for_confirmation(char *response, char **argv) {
606         int saved_stdout = -1, saved_stdin = -1, r;
607         _cleanup_free_ char *line = NULL;
608
609         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
610         if (r < 0)
611                 return r;
612
613         line = exec_command_line(argv);
614         if (!line)
615                 return -ENOMEM;
616
617         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
618
619         restore_confirm_stdio(&saved_stdin, &saved_stdout);
620
621         return r;
622 }
623
624 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
625         bool keep_groups = false;
626         int r;
627
628         assert(context);
629
630         /* Lookup and set GID and supplementary group list. Here too
631          * we avoid NSS lookups for gid=0. */
632
633         if (context->group || username) {
634
635                 if (context->group) {
636                         const char *g = context->group;
637
638                         if ((r = get_group_creds(&g, &gid)) < 0)
639                                 return r;
640                 }
641
642                 /* First step, initialize groups from /etc/groups */
643                 if (username && gid != 0) {
644                         if (initgroups(username, gid) < 0)
645                                 return -errno;
646
647                         keep_groups = true;
648                 }
649
650                 /* Second step, set our gids */
651                 if (setresgid(gid, gid, gid) < 0)
652                         return -errno;
653         }
654
655         if (context->supplementary_groups) {
656                 int ngroups_max, k;
657                 gid_t *gids;
658                 char **i;
659
660                 /* Final step, initialize any manually set supplementary groups */
661                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
662
663                 if (!(gids = new(gid_t, ngroups_max)))
664                         return -ENOMEM;
665
666                 if (keep_groups) {
667                         if ((k = getgroups(ngroups_max, gids)) < 0) {
668                                 free(gids);
669                                 return -errno;
670                         }
671                 } else
672                         k = 0;
673
674                 STRV_FOREACH(i, context->supplementary_groups) {
675                         const char *g;
676
677                         if (k >= ngroups_max) {
678                                 free(gids);
679                                 return -E2BIG;
680                         }
681
682                         g = *i;
683                         r = get_group_creds(&g, gids+k);
684                         if (r < 0) {
685                                 free(gids);
686                                 return r;
687                         }
688
689                         k++;
690                 }
691
692                 if (setgroups(k, gids) < 0) {
693                         free(gids);
694                         return -errno;
695                 }
696
697                 free(gids);
698         }
699
700         return 0;
701 }
702
703 static int enforce_user(const ExecContext *context, uid_t uid) {
704         assert(context);
705
706         /* Sets (but doesn't lookup) the uid and make sure we keep the
707          * capabilities while doing so. */
708
709         if (context->capabilities) {
710                 _cleanup_cap_free_ cap_t d = NULL;
711                 static const cap_value_t bits[] = {
712                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
713                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
714                 };
715
716                 /* First step: If we need to keep capabilities but
717                  * drop privileges we need to make sure we keep our
718                  * caps, while we drop privileges. */
719                 if (uid != 0) {
720                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
721
722                         if (prctl(PR_GET_SECUREBITS) != sb)
723                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
724                                         return -errno;
725                 }
726
727                 /* Second step: set the capabilities. This will reduce
728                  * the capabilities to the minimum we need. */
729
730                 d = cap_dup(context->capabilities);
731                 if (!d)
732                         return -errno;
733
734                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736                         return -errno;
737
738                 if (cap_set_proc(d) < 0)
739                         return -errno;
740         }
741
742         /* Third step: actually set the uids */
743         if (setresuid(uid, uid, uid) < 0)
744                 return -errno;
745
746         /* At this point we should have all necessary capabilities but
747            are otherwise a normal user. However, the caps might got
748            corrupted due to the setresuid() so we need clean them up
749            later. This is done outside of this call. */
750
751         return 0;
752 }
753
754 #ifdef HAVE_PAM
755
756 static int null_conv(
757                 int num_msg,
758                 const struct pam_message **msg,
759                 struct pam_response **resp,
760                 void *appdata_ptr) {
761
762         /* We don't support conversations */
763
764         return PAM_CONV_ERR;
765 }
766
767 static int setup_pam(
768                 const char *name,
769                 const char *user,
770                 uid_t uid,
771                 const char *tty,
772                 char ***pam_env,
773                 int fds[], unsigned n_fds) {
774
775         static const struct pam_conv conv = {
776                 .conv = null_conv,
777                 .appdata_ptr = NULL
778         };
779
780         pam_handle_t *handle = NULL;
781         sigset_t ss, old_ss;
782         int pam_code = PAM_SUCCESS;
783         int err;
784         char **e = NULL;
785         bool close_session = false;
786         pid_t pam_pid = 0, parent_pid;
787         int flags = 0;
788
789         assert(name);
790         assert(user);
791         assert(pam_env);
792
793         /* We set up PAM in the parent process, then fork. The child
794          * will then stay around until killed via PR_GET_PDEATHSIG or
795          * systemd via the cgroup logic. It will then remove the PAM
796          * session again. The parent process will exec() the actual
797          * daemon. We do things this way to ensure that the main PID
798          * of the daemon is the one we initially fork()ed. */
799
800         if (log_get_max_level() < LOG_DEBUG)
801                 flags |= PAM_SILENT;
802
803         pam_code = pam_start(name, user, &conv, &handle);
804         if (pam_code != PAM_SUCCESS) {
805                 handle = NULL;
806                 goto fail;
807         }
808
809         if (tty) {
810                 pam_code = pam_set_item(handle, PAM_TTY, tty);
811                 if (pam_code != PAM_SUCCESS)
812                         goto fail;
813         }
814
815         pam_code = pam_acct_mgmt(handle, flags);
816         if (pam_code != PAM_SUCCESS)
817                 goto fail;
818
819         pam_code = pam_open_session(handle, flags);
820         if (pam_code != PAM_SUCCESS)
821                 goto fail;
822
823         close_session = true;
824
825         e = pam_getenvlist(handle);
826         if (!e) {
827                 pam_code = PAM_BUF_ERR;
828                 goto fail;
829         }
830
831         /* Block SIGTERM, so that we know that it won't get lost in
832          * the child */
833         if (sigemptyset(&ss) < 0 ||
834             sigaddset(&ss, SIGTERM) < 0 ||
835             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
836                 goto fail;
837
838         parent_pid = getpid();
839
840         pam_pid = fork();
841         if (pam_pid < 0)
842                 goto fail;
843
844         if (pam_pid == 0) {
845                 int sig;
846                 int r = EXIT_PAM;
847
848                 /* The child's job is to reset the PAM session on
849                  * termination */
850
851                 /* This string must fit in 10 chars (i.e. the length
852                  * of "/sbin/init"), to look pretty in /bin/ps */
853                 rename_process("(sd-pam)");
854
855                 /* Make sure we don't keep open the passed fds in this
856                 child. We assume that otherwise only those fds are
857                 open here that have been opened by PAM. */
858                 close_many(fds, n_fds);
859
860                 /* Drop privileges - we don't need any to pam_close_session
861                  * and this will make PR_SET_PDEATHSIG work in most cases.
862                  * If this fails, ignore the error - but expect sd-pam threads
863                  * to fail to exit normally */
864                 if (setresuid(uid, uid, uid) < 0)
865                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
866
867                 /* Wait until our parent died. This will only work if
868                  * the above setresuid() succeeds, otherwise the kernel
869                  * will not allow unprivileged parents kill their privileged
870                  * children this way. We rely on the control groups kill logic
871                  * to do the rest for us. */
872                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873                         goto child_finish;
874
875                 /* Check if our parent process might already have
876                  * died? */
877                 if (getppid() == parent_pid) {
878                         for (;;) {
879                                 if (sigwait(&ss, &sig) < 0) {
880                                         if (errno == EINTR)
881                                                 continue;
882
883                                         goto child_finish;
884                                 }
885
886                                 assert(sig == SIGTERM);
887                                 break;
888                         }
889                 }
890
891                 /* If our parent died we'll end the session */
892                 if (getppid() != parent_pid) {
893                         pam_code = pam_close_session(handle, flags);
894                         if (pam_code != PAM_SUCCESS)
895                                 goto child_finish;
896                 }
897
898                 r = 0;
899
900         child_finish:
901                 pam_end(handle, pam_code | flags);
902                 _exit(r);
903         }
904
905         /* If the child was forked off successfully it will do all the
906          * cleanups, so forget about the handle here. */
907         handle = NULL;
908
909         /* Unblock SIGTERM again in the parent */
910         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
911                 goto fail;
912
913         /* We close the log explicitly here, since the PAM modules
914          * might have opened it, but we don't want this fd around. */
915         closelog();
916
917         *pam_env = e;
918         e = NULL;
919
920         return 0;
921
922 fail:
923         if (pam_code != PAM_SUCCESS) {
924                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
925                 err = -EPERM;  /* PAM errors do not map to errno */
926         } else {
927                 log_error_errno(errno, "PAM failed: %m");
928                 err = -errno;
929         }
930
931         if (handle) {
932                 if (close_session)
933                         pam_code = pam_close_session(handle, flags);
934
935                 pam_end(handle, pam_code | flags);
936         }
937
938         strv_free(e);
939
940         closelog();
941
942         if (pam_pid > 1) {
943                 kill(pam_pid, SIGTERM);
944                 kill(pam_pid, SIGCONT);
945         }
946
947         return err;
948 }
949 #endif
950
951 static void rename_process_from_path(const char *path) {
952         char process_name[11];
953         const char *p;
954         size_t l;
955
956         /* This resulting string must fit in 10 chars (i.e. the length
957          * of "/sbin/init") to look pretty in /bin/ps */
958
959         p = basename(path);
960         if (isempty(p)) {
961                 rename_process("(...)");
962                 return;
963         }
964
965         l = strlen(p);
966         if (l > 8) {
967                 /* The end of the process name is usually more
968                  * interesting, since the first bit might just be
969                  * "systemd-" */
970                 p = p + l - 8;
971                 l = 8;
972         }
973
974         process_name[0] = '(';
975         memcpy(process_name+1, p, l);
976         process_name[1+l] = ')';
977         process_name[1+l+1] = 0;
978
979         rename_process(process_name);
980 }
981
982 #ifdef HAVE_SECCOMP
983
984 static int apply_seccomp(const ExecContext *c) {
985         uint32_t negative_action, action;
986         scmp_filter_ctx *seccomp;
987         Iterator i;
988         void *id;
989         int r;
990
991         assert(c);
992
993         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
994
995         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
996         if (!seccomp)
997                 return -ENOMEM;
998
999         if (c->syscall_archs) {
1000
1001                 SET_FOREACH(id, c->syscall_archs, i) {
1002                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1003                         if (r == -EEXIST)
1004                                 continue;
1005                         if (r < 0)
1006                                 goto finish;
1007                 }
1008
1009         } else {
1010                 r = seccomp_add_secondary_archs(seccomp);
1011                 if (r < 0)
1012                         goto finish;
1013         }
1014
1015         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1016         SET_FOREACH(id, c->syscall_filter, i) {
1017                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1018                 if (r < 0)
1019                         goto finish;
1020         }
1021
1022         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1023         if (r < 0)
1024                 goto finish;
1025
1026         r = seccomp_load(seccomp);
1027
1028 finish:
1029         seccomp_release(seccomp);
1030         return r;
1031 }
1032
1033 static int apply_address_families(const ExecContext *c) {
1034         scmp_filter_ctx *seccomp;
1035         Iterator i;
1036         int r;
1037
1038         assert(c);
1039
1040         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1041         if (!seccomp)
1042                 return -ENOMEM;
1043
1044         r = seccomp_add_secondary_archs(seccomp);
1045         if (r < 0)
1046                 goto finish;
1047
1048         if (c->address_families_whitelist) {
1049                 int af, first = 0, last = 0;
1050                 void *afp;
1051
1052                 /* If this is a whitelist, we first block the address
1053                  * families that are out of range and then everything
1054                  * that is not in the set. First, we find the lowest
1055                  * and highest address family in the set. */
1056
1057                 SET_FOREACH(afp, c->address_families, i) {
1058                         af = PTR_TO_INT(afp);
1059
1060                         if (af <= 0 || af >= af_max())
1061                                 continue;
1062
1063                         if (first == 0 || af < first)
1064                                 first = af;
1065
1066                         if (last == 0 || af > last)
1067                                 last = af;
1068                 }
1069
1070                 assert((first == 0) == (last == 0));
1071
1072                 if (first == 0) {
1073
1074                         /* No entries in the valid range, block everything */
1075                         r = seccomp_rule_add(
1076                                         seccomp,
1077                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1078                                         SCMP_SYS(socket),
1079                                         0);
1080                         if (r < 0)
1081                                 goto finish;
1082
1083                 } else {
1084
1085                         /* Block everything below the first entry */
1086                         r = seccomp_rule_add(
1087                                         seccomp,
1088                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1089                                         SCMP_SYS(socket),
1090                                         1,
1091                                         SCMP_A0(SCMP_CMP_LT, first));
1092                         if (r < 0)
1093                                 goto finish;
1094
1095                         /* Block everything above the last entry */
1096                         r = seccomp_rule_add(
1097                                         seccomp,
1098                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1099                                         SCMP_SYS(socket),
1100                                         1,
1101                                         SCMP_A0(SCMP_CMP_GT, last));
1102                         if (r < 0)
1103                                 goto finish;
1104
1105                         /* Block everything between the first and last
1106                          * entry */
1107                         for (af = 1; af < af_max(); af++) {
1108
1109                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1110                                         continue;
1111
1112                                 r = seccomp_rule_add(
1113                                                 seccomp,
1114                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1115                                                 SCMP_SYS(socket),
1116                                                 1,
1117                                                 SCMP_A0(SCMP_CMP_EQ, af));
1118                                 if (r < 0)
1119                                         goto finish;
1120                         }
1121                 }
1122
1123         } else {
1124                 void *af;
1125
1126                 /* If this is a blacklist, then generate one rule for
1127                  * each address family that are then combined in OR
1128                  * checks. */
1129
1130                 SET_FOREACH(af, c->address_families, i) {
1131
1132                         r = seccomp_rule_add(
1133                                         seccomp,
1134                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1135                                         SCMP_SYS(socket),
1136                                         1,
1137                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1138                         if (r < 0)
1139                                 goto finish;
1140                 }
1141         }
1142
1143         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1144         if (r < 0)
1145                 goto finish;
1146
1147         r = seccomp_load(seccomp);
1148
1149 finish:
1150         seccomp_release(seccomp);
1151         return r;
1152 }
1153
1154 #endif
1155
1156 static void do_idle_pipe_dance(int idle_pipe[4]) {
1157         assert(idle_pipe);
1158
1159
1160         safe_close(idle_pipe[1]);
1161         safe_close(idle_pipe[2]);
1162
1163         if (idle_pipe[0] >= 0) {
1164                 int r;
1165
1166                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1167
1168                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1169                         /* Signal systemd that we are bored and want to continue. */
1170                         write(idle_pipe[3], "x", 1);
1171
1172                         /* Wait for systemd to react to the signal above. */
1173                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1174                 }
1175
1176                 safe_close(idle_pipe[0]);
1177
1178         }
1179
1180         safe_close(idle_pipe[3]);
1181 }
1182
1183 static int build_environment(
1184                 const ExecContext *c,
1185                 unsigned n_fds,
1186                 usec_t watchdog_usec,
1187                 const char *home,
1188                 const char *username,
1189                 const char *shell,
1190                 char ***ret) {
1191
1192         _cleanup_strv_free_ char **our_env = NULL;
1193         unsigned n_env = 0;
1194         char *x;
1195
1196         assert(c);
1197         assert(ret);
1198
1199         our_env = new0(char*, 10);
1200         if (!our_env)
1201                 return -ENOMEM;
1202
1203         if (n_fds > 0) {
1204                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1205                         return -ENOMEM;
1206                 our_env[n_env++] = x;
1207
1208                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1209                         return -ENOMEM;
1210                 our_env[n_env++] = x;
1211         }
1212
1213         if (watchdog_usec > 0) {
1214                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1215                         return -ENOMEM;
1216                 our_env[n_env++] = x;
1217
1218                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1219                         return -ENOMEM;
1220                 our_env[n_env++] = x;
1221         }
1222
1223         if (home) {
1224                 x = strappend("HOME=", home);
1225                 if (!x)
1226                         return -ENOMEM;
1227                 our_env[n_env++] = x;
1228         }
1229
1230         if (username) {
1231                 x = strappend("LOGNAME=", username);
1232                 if (!x)
1233                         return -ENOMEM;
1234                 our_env[n_env++] = x;
1235
1236                 x = strappend("USER=", username);
1237                 if (!x)
1238                         return -ENOMEM;
1239                 our_env[n_env++] = x;
1240         }
1241
1242         if (shell) {
1243                 x = strappend("SHELL=", shell);
1244                 if (!x)
1245                         return -ENOMEM;
1246                 our_env[n_env++] = x;
1247         }
1248
1249         if (is_terminal_input(c->std_input) ||
1250             c->std_output == EXEC_OUTPUT_TTY ||
1251             c->std_error == EXEC_OUTPUT_TTY ||
1252             c->tty_path) {
1253
1254                 x = strdup(default_term_for_tty(tty_path(c)));
1255                 if (!x)
1256                         return -ENOMEM;
1257                 our_env[n_env++] = x;
1258         }
1259
1260         our_env[n_env++] = NULL;
1261         assert(n_env <= 10);
1262
1263         *ret = our_env;
1264         our_env = NULL;
1265
1266         return 0;
1267 }
1268
1269 static int exec_child(ExecCommand *command,
1270                       const ExecContext *context,
1271                       const ExecParameters *params,
1272                       ExecRuntime *runtime,
1273                       char **argv,
1274                       int socket_fd,
1275                       int *fds, unsigned n_fds,
1276                       char **files_env,
1277                       int *error) {
1278
1279         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1280         _cleanup_free_ char *mac_selinux_context_net = NULL;
1281         const char *username = NULL, *home = NULL, *shell = NULL;
1282         unsigned n_dont_close = 0;
1283         int dont_close[n_fds + 4];
1284         uid_t uid = UID_INVALID;
1285         gid_t gid = GID_INVALID;
1286         int i, err;
1287
1288         assert(command);
1289         assert(context);
1290         assert(params);
1291         assert(error);
1292
1293         rename_process_from_path(command->path);
1294
1295         /* We reset exactly these signals, since they are the
1296          * only ones we set to SIG_IGN in the main daemon. All
1297          * others we leave untouched because we set them to
1298          * SIG_DFL or a valid handler initially, both of which
1299          * will be demoted to SIG_DFL. */
1300         default_signals(SIGNALS_CRASH_HANDLER,
1301                         SIGNALS_IGNORE, -1);
1302
1303         if (context->ignore_sigpipe)
1304                 ignore_signals(SIGPIPE, -1);
1305
1306         err = reset_signal_mask();
1307         if (err < 0) {
1308                 *error = EXIT_SIGNAL_MASK;
1309                 return err;
1310         }
1311
1312         if (params->idle_pipe)
1313                 do_idle_pipe_dance(params->idle_pipe);
1314
1315         /* Close sockets very early to make sure we don't
1316          * block init reexecution because it cannot bind its
1317          * sockets */
1318         log_forget_fds();
1319
1320         if (socket_fd >= 0)
1321                 dont_close[n_dont_close++] = socket_fd;
1322         if (n_fds > 0) {
1323                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324                 n_dont_close += n_fds;
1325         }
1326         if (params->bus_endpoint_fd >= 0)
1327                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1328         if (runtime) {
1329                 if (runtime->netns_storage_socket[0] >= 0)
1330                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331                 if (runtime->netns_storage_socket[1] >= 0)
1332                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1333         }
1334
1335         err = close_all_fds(dont_close, n_dont_close);
1336         if (err < 0) {
1337                 *error = EXIT_FDS;
1338                 return err;
1339         }
1340
1341         if (!context->same_pgrp)
1342                 if (setsid() < 0) {
1343                         *error = EXIT_SETSID;
1344                         return -errno;
1345                 }
1346
1347         exec_context_tty_reset(context);
1348
1349         if (params->confirm_spawn) {
1350                 char response;
1351
1352                 err = ask_for_confirmation(&response, argv);
1353                 if (err == -ETIMEDOUT)
1354                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1355                 else if (err < 0)
1356                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1357                 else if (response == 's') {
1358                         write_confirm_message("Skipping execution.\n");
1359                         *error = EXIT_CONFIRM;
1360                         return -ECANCELED;
1361                 } else if (response == 'n') {
1362                         write_confirm_message("Failing execution.\n");
1363                         *error = 0;
1364                         return 0;
1365                 }
1366         }
1367
1368         if (context->user) {
1369                 username = context->user;
1370                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1371                 if (err < 0) {
1372                         *error = EXIT_USER;
1373                         return err;
1374                 }
1375         }
1376
1377         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378          * must sure to drop O_NONBLOCK */
1379         if (socket_fd >= 0)
1380                 fd_nonblock(socket_fd, false);
1381
1382         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1383         if (err < 0) {
1384                 *error = EXIT_STDIN;
1385                 return err;
1386         }
1387
1388         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389         if (err < 0) {
1390                 *error = EXIT_STDOUT;
1391                 return err;
1392         }
1393
1394         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1395         if (err < 0) {
1396                 *error = EXIT_STDERR;
1397                 return err;
1398         }
1399
1400         if (params->cgroup_path) {
1401                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1402                 if (err < 0) {
1403                         *error = EXIT_CGROUP;
1404                         return err;
1405                 }
1406         }
1407
1408         if (context->oom_score_adjust_set) {
1409                 char t[16];
1410
1411                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1412                 char_array_0(t);
1413
1414                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1415                         *error = EXIT_OOM_ADJUST;
1416                         return -errno;
1417                 }
1418         }
1419
1420         if (context->nice_set)
1421                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1422                         *error = EXIT_NICE;
1423                         return -errno;
1424                 }
1425
1426         if (context->cpu_sched_set) {
1427                 struct sched_param param = {
1428                         .sched_priority = context->cpu_sched_priority,
1429                 };
1430
1431                 err = sched_setscheduler(0,
1432                                          context->cpu_sched_policy |
1433                                          (context->cpu_sched_reset_on_fork ?
1434                                           SCHED_RESET_ON_FORK : 0),
1435                                          &param);
1436                 if (err < 0) {
1437                         *error = EXIT_SETSCHEDULER;
1438                         return -errno;
1439                 }
1440         }
1441
1442         if (context->cpuset)
1443                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1444                         *error = EXIT_CPUAFFINITY;
1445                         return -errno;
1446                 }
1447
1448         if (context->ioprio_set)
1449                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1450                         *error = EXIT_IOPRIO;
1451                         return -errno;
1452                 }
1453
1454         if (context->timer_slack_nsec != NSEC_INFINITY)
1455                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1456                         *error = EXIT_TIMERSLACK;
1457                         return -errno;
1458                 }
1459
1460         if (context->personality != 0xffffffffUL)
1461                 if (personality(context->personality) < 0) {
1462                         *error = EXIT_PERSONALITY;
1463                         return -errno;
1464                 }
1465
1466         if (context->utmp_id)
1467                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1468
1469         if (context->user && is_terminal_input(context->std_input)) {
1470                 err = chown_terminal(STDIN_FILENO, uid);
1471                 if (err < 0) {
1472                         *error = EXIT_STDIN;
1473                         return err;
1474                 }
1475         }
1476
1477 #ifdef ENABLE_KDBUS
1478         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1479                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1480
1481                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1482                 if (err < 0) {
1483                         *error = EXIT_BUS_ENDPOINT;
1484                         return err;
1485                 }
1486         }
1487 #endif
1488
1489         /* If delegation is enabled we'll pass ownership of the cgroup
1490          * (but only in systemd's own controller hierarchy!) to the
1491          * user of the new process. */
1492         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1493                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1494                 if (err < 0) {
1495                         *error = EXIT_CGROUP;
1496                         return err;
1497                 }
1498
1499
1500                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1501                 if (err < 0) {
1502                         *error = EXIT_CGROUP;
1503                         return err;
1504                 }
1505         }
1506
1507         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1508                 char **rt;
1509
1510                 STRV_FOREACH(rt, context->runtime_directory) {
1511                         _cleanup_free_ char *p;
1512
1513                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1514                         if (!p) {
1515                                 *error = EXIT_RUNTIME_DIRECTORY;
1516                                 return -ENOMEM;
1517                         }
1518
1519                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1520                         if (err < 0) {
1521                                 *error = EXIT_RUNTIME_DIRECTORY;
1522                                 return err;
1523                         }
1524                 }
1525         }
1526
1527         if (params->apply_permissions) {
1528                 err = enforce_groups(context, username, gid);
1529                 if (err < 0) {
1530                         *error = EXIT_GROUP;
1531                         return err;
1532                 }
1533         }
1534
1535         umask(context->umask);
1536
1537 #ifdef HAVE_PAM
1538         if (params->apply_permissions && context->pam_name && username) {
1539                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1540                 if (err < 0) {
1541                         *error = EXIT_PAM;
1542                         return err;
1543                 }
1544         }
1545 #endif
1546
1547         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1548                 err = setup_netns(runtime->netns_storage_socket);
1549                 if (err < 0) {
1550                         *error = EXIT_NETWORK;
1551                         return err;
1552                 }
1553         }
1554
1555         if (!strv_isempty(context->read_write_dirs) ||
1556             !strv_isempty(context->read_only_dirs) ||
1557             !strv_isempty(context->inaccessible_dirs) ||
1558             context->mount_flags != 0 ||
1559             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1560             params->bus_endpoint_path ||
1561             context->private_devices ||
1562             context->protect_system != PROTECT_SYSTEM_NO ||
1563             context->protect_home != PROTECT_HOME_NO) {
1564
1565                 char *tmp = NULL, *var = NULL;
1566
1567                 /* The runtime struct only contains the parent
1568                  * of the private /tmp, which is
1569                  * non-accessible to world users. Inside of it
1570                  * there's a /tmp that is sticky, and that's
1571                  * the one we want to use here. */
1572
1573                 if (context->private_tmp && runtime) {
1574                         if (runtime->tmp_dir)
1575                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1576                         if (runtime->var_tmp_dir)
1577                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1578                 }
1579
1580                 err = setup_namespace(
1581                                 context->read_write_dirs,
1582                                 context->read_only_dirs,
1583                                 context->inaccessible_dirs,
1584                                 tmp,
1585                                 var,
1586                                 params->bus_endpoint_path,
1587                                 context->private_devices,
1588                                 context->protect_home,
1589                                 context->protect_system,
1590                                 context->mount_flags);
1591
1592                 if (err == -EPERM)
1593                         log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1594                 else if (err < 0) {
1595                         *error = EXIT_NAMESPACE;
1596                         return err;
1597                 }
1598         }
1599
1600         if (params->apply_chroot) {
1601                 if (context->root_directory)
1602                         if (chroot(context->root_directory) < 0) {
1603                                 *error = EXIT_CHROOT;
1604                                 return -errno;
1605                         }
1606
1607                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1608                         *error = EXIT_CHDIR;
1609                         return -errno;
1610                 }
1611         } else {
1612                 _cleanup_free_ char *d = NULL;
1613
1614                 if (asprintf(&d, "%s/%s",
1615                              context->root_directory ? context->root_directory : "",
1616                              context->working_directory ? context->working_directory : "") < 0) {
1617                         *error = EXIT_MEMORY;
1618                         return -ENOMEM;
1619                 }
1620
1621                 if (chdir(d) < 0) {
1622                         *error = EXIT_CHDIR;
1623                         return -errno;
1624                 }
1625         }
1626
1627 #ifdef HAVE_SELINUX
1628         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1629                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1630                 if (err < 0) {
1631                         *error = EXIT_SELINUX_CONTEXT;
1632                         return err;
1633                 }
1634         }
1635 #endif
1636
1637         /* We repeat the fd closing here, to make sure that
1638          * nothing is leaked from the PAM modules. Note that
1639          * we are more aggressive this time since socket_fd
1640          * and the netns fds we don't need anymore. The custom
1641          * endpoint fd was needed to upload the policy and can
1642          * now be closed as well. */
1643         err = close_all_fds(fds, n_fds);
1644         if (err >= 0)
1645                 err = shift_fds(fds, n_fds);
1646         if (err >= 0)
1647                 err = flags_fds(fds, n_fds, context->non_blocking);
1648         if (err < 0) {
1649                 *error = EXIT_FDS;
1650                 return err;
1651         }
1652
1653         if (params->apply_permissions) {
1654
1655                 for (i = 0; i < _RLIMIT_MAX; i++) {
1656                         if (!context->rlimit[i])
1657                                 continue;
1658
1659                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1660                                 *error = EXIT_LIMITS;
1661                                 return -errno;
1662                         }
1663                 }
1664
1665                 if (context->capability_bounding_set_drop) {
1666                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1667                         if (err < 0) {
1668                                 *error = EXIT_CAPABILITIES;
1669                                 return err;
1670                         }
1671                 }
1672
1673 #ifdef HAVE_SMACK
1674                 if (context->smack_process_label) {
1675                         err = mac_smack_apply_pid(0, context->smack_process_label);
1676                         if (err < 0) {
1677                                 *error = EXIT_SMACK_PROCESS_LABEL;
1678                                 return err;
1679                         }
1680                 }
1681 #endif
1682
1683                 if (context->user) {
1684                         err = enforce_user(context, uid);
1685                         if (err < 0) {
1686                                 *error = EXIT_USER;
1687                                 return err;
1688                         }
1689                 }
1690
1691                 /* PR_GET_SECUREBITS is not privileged, while
1692                  * PR_SET_SECUREBITS is. So to suppress
1693                  * potential EPERMs we'll try not to call
1694                  * PR_SET_SECUREBITS unless necessary. */
1695                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1696                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1697                                 *error = EXIT_SECUREBITS;
1698                                 return -errno;
1699                         }
1700
1701                 if (context->capabilities)
1702                         if (cap_set_proc(context->capabilities) < 0) {
1703                                 *error = EXIT_CAPABILITIES;
1704                                 return -errno;
1705                         }
1706
1707                 if (context->no_new_privileges)
1708                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1709                                 *error = EXIT_NO_NEW_PRIVILEGES;
1710                                 return -errno;
1711                         }
1712
1713 #ifdef HAVE_SECCOMP
1714                 if (context->address_families_whitelist ||
1715                     !set_isempty(context->address_families)) {
1716                         err = apply_address_families(context);
1717                         if (err < 0) {
1718                                 *error = EXIT_ADDRESS_FAMILIES;
1719                                 return err;
1720                         }
1721                 }
1722
1723                 if (context->syscall_whitelist ||
1724                     !set_isempty(context->syscall_filter) ||
1725                     !set_isempty(context->syscall_archs)) {
1726                         err = apply_seccomp(context);
1727                         if (err < 0) {
1728                                 *error = EXIT_SECCOMP;
1729                                 return err;
1730                         }
1731                 }
1732 #endif
1733
1734 #ifdef HAVE_SELINUX
1735                 if (mac_selinux_use()) {
1736                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1737
1738                         if (exec_context) {
1739                                 err = setexeccon(exec_context);
1740                                 if (err < 0) {
1741                                         *error = EXIT_SELINUX_CONTEXT;
1742                                         return err;
1743                                 }
1744                         }
1745                 }
1746 #endif
1747
1748 #ifdef HAVE_APPARMOR
1749                 if (context->apparmor_profile && mac_apparmor_use()) {
1750                         err = aa_change_onexec(context->apparmor_profile);
1751                         if (err < 0 && !context->apparmor_profile_ignore) {
1752                                 *error = EXIT_APPARMOR_PROFILE;
1753                                 return -errno;
1754                         }
1755                 }
1756 #endif
1757         }
1758
1759         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1760         if (err < 0) {
1761                 *error = EXIT_MEMORY;
1762                 return err;
1763         }
1764
1765         final_env = strv_env_merge(5,
1766                                    params->environment,
1767                                    our_env,
1768                                    context->environment,
1769                                    files_env,
1770                                    pam_env,
1771                                    NULL);
1772         if (!final_env) {
1773                 *error = EXIT_MEMORY;
1774                 return -ENOMEM;
1775         }
1776
1777         final_argv = replace_env_argv(argv, final_env);
1778         if (!final_argv) {
1779                 *error = EXIT_MEMORY;
1780                 return -ENOMEM;
1781         }
1782
1783         final_env = strv_env_clean(final_env);
1784
1785         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1786                 _cleanup_free_ char *line;
1787
1788                 line = exec_command_line(final_argv);
1789                 if (line) {
1790                         log_open();
1791                         log_unit_struct(params->unit_id,
1792                                         LOG_DEBUG,
1793                                         "EXECUTABLE=%s", command->path,
1794                                         LOG_MESSAGE("Executing: %s", line),
1795                                         NULL);
1796                         log_close();
1797                 }
1798         }
1799         execve(command->path, final_argv, final_env);
1800         *error = EXIT_EXEC;
1801         return -errno;
1802 }
1803
1804 int exec_spawn(ExecCommand *command,
1805                const ExecContext *context,
1806                const ExecParameters *params,
1807                ExecRuntime *runtime,
1808                pid_t *ret) {
1809
1810         _cleanup_strv_free_ char **files_env = NULL;
1811         int *fds = NULL; unsigned n_fds = 0;
1812         char *line, **argv;
1813         int socket_fd;
1814         pid_t pid;
1815         int err;
1816
1817         assert(command);
1818         assert(context);
1819         assert(ret);
1820         assert(params);
1821         assert(params->fds || params->n_fds <= 0);
1822
1823         if (context->std_input == EXEC_INPUT_SOCKET ||
1824             context->std_output == EXEC_OUTPUT_SOCKET ||
1825             context->std_error == EXEC_OUTPUT_SOCKET) {
1826
1827                 if (params->n_fds != 1)
1828                         return -EINVAL;
1829
1830                 socket_fd = params->fds[0];
1831         } else {
1832                 socket_fd = -1;
1833                 fds = params->fds;
1834                 n_fds = params->n_fds;
1835         }
1836
1837         err = exec_context_load_environment(context, params->unit_id, &files_env);
1838         if (err < 0) {
1839                 log_unit_struct(params->unit_id,
1840                                 LOG_ERR,
1841                                 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1842                                 LOG_ERRNO(-err),
1843                                 NULL);
1844                 return err;
1845         }
1846
1847         argv = params->argv ?: command->argv;
1848
1849         line = exec_command_line(argv);
1850         if (!line)
1851                 return log_oom();
1852
1853         log_unit_struct(params->unit_id,
1854                         LOG_DEBUG,
1855                         "EXECUTABLE=%s", command->path,
1856                         LOG_MESSAGE("About to execute: %s", line),
1857                         NULL);
1858         free(line);
1859
1860         pid = fork();
1861         if (pid < 0)
1862                 return -errno;
1863
1864         if (pid == 0) {
1865                 int r;
1866
1867                 err = exec_child(command,
1868                                  context,
1869                                  params,
1870                                  runtime,
1871                                  argv,
1872                                  socket_fd,
1873                                  fds, n_fds,
1874                                  files_env,
1875                                  &r);
1876                 if (r != 0) {
1877                         log_open();
1878                         log_struct(LOG_ERR,
1879                                    LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1880                                    "EXECUTABLE=%s", command->path,
1881                                    LOG_MESSAGE("Failed at step %s spawning %s: %s",
1882                                                exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1883                                                command->path, strerror(-err)),
1884                                    LOG_ERRNO(-err),
1885                                    NULL);
1886                         log_close();
1887                 }
1888
1889                 _exit(r);
1890         }
1891
1892         log_unit_struct(params->unit_id,
1893                         LOG_DEBUG,
1894                         LOG_MESSAGE("Forked %s as "PID_FMT,
1895                                     command->path, pid),
1896                         NULL);
1897
1898         /* We add the new process to the cgroup both in the child (so
1899          * that we can be sure that no user code is ever executed
1900          * outside of the cgroup) and in the parent (so that we can be
1901          * sure that when we kill the cgroup the process will be
1902          * killed too). */
1903         if (params->cgroup_path)
1904                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1905
1906         exec_status_start(&command->exec_status, pid);
1907
1908         *ret = pid;
1909         return 0;
1910 }
1911
1912 void exec_context_init(ExecContext *c) {
1913         assert(c);
1914
1915         c->umask = 0022;
1916         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1917         c->cpu_sched_policy = SCHED_OTHER;
1918         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1919         c->syslog_level_prefix = true;
1920         c->ignore_sigpipe = true;
1921         c->timer_slack_nsec = NSEC_INFINITY;
1922         c->personality = 0xffffffffUL;
1923         c->runtime_directory_mode = 0755;
1924 }
1925
1926 void exec_context_done(ExecContext *c) {
1927         unsigned l;
1928
1929         assert(c);
1930
1931         strv_free(c->environment);
1932         c->environment = NULL;
1933
1934         strv_free(c->environment_files);
1935         c->environment_files = NULL;
1936
1937         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1938                 free(c->rlimit[l]);
1939                 c->rlimit[l] = NULL;
1940         }
1941
1942         free(c->working_directory);
1943         c->working_directory = NULL;
1944         free(c->root_directory);
1945         c->root_directory = NULL;
1946
1947         free(c->tty_path);
1948         c->tty_path = NULL;
1949
1950         free(c->syslog_identifier);
1951         c->syslog_identifier = NULL;
1952
1953         free(c->user);
1954         c->user = NULL;
1955
1956         free(c->group);
1957         c->group = NULL;
1958
1959         strv_free(c->supplementary_groups);
1960         c->supplementary_groups = NULL;
1961
1962         free(c->pam_name);
1963         c->pam_name = NULL;
1964
1965         if (c->capabilities) {
1966                 cap_free(c->capabilities);
1967                 c->capabilities = NULL;
1968         }
1969
1970         strv_free(c->read_only_dirs);
1971         c->read_only_dirs = NULL;
1972
1973         strv_free(c->read_write_dirs);
1974         c->read_write_dirs = NULL;
1975
1976         strv_free(c->inaccessible_dirs);
1977         c->inaccessible_dirs = NULL;
1978
1979         if (c->cpuset)
1980                 CPU_FREE(c->cpuset);
1981
1982         free(c->utmp_id);
1983         c->utmp_id = NULL;
1984
1985         free(c->selinux_context);
1986         c->selinux_context = NULL;
1987
1988         free(c->apparmor_profile);
1989         c->apparmor_profile = NULL;
1990
1991         set_free(c->syscall_filter);
1992         c->syscall_filter = NULL;
1993
1994         set_free(c->syscall_archs);
1995         c->syscall_archs = NULL;
1996
1997         set_free(c->address_families);
1998         c->address_families = NULL;
1999
2000         strv_free(c->runtime_directory);
2001         c->runtime_directory = NULL;
2002
2003         bus_endpoint_free(c->bus_endpoint);
2004         c->bus_endpoint = NULL;
2005 }
2006
2007 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2008         char **i;
2009
2010         assert(c);
2011
2012         if (!runtime_prefix)
2013                 return 0;
2014
2015         STRV_FOREACH(i, c->runtime_directory) {
2016                 _cleanup_free_ char *p;
2017
2018                 p = strjoin(runtime_prefix, "/", *i, NULL);
2019                 if (!p)
2020                         return -ENOMEM;
2021
2022                 /* We execute this synchronously, since we need to be
2023                  * sure this is gone when we start the service
2024                  * next. */
2025                 rm_rf(p, false, true, false);
2026         }
2027
2028         return 0;
2029 }
2030
2031 void exec_command_done(ExecCommand *c) {
2032         assert(c);
2033
2034         free(c->path);
2035         c->path = NULL;
2036
2037         strv_free(c->argv);
2038         c->argv = NULL;
2039 }
2040
2041 void exec_command_done_array(ExecCommand *c, unsigned n) {
2042         unsigned i;
2043
2044         for (i = 0; i < n; i++)
2045                 exec_command_done(c+i);
2046 }
2047
2048 ExecCommand* exec_command_free_list(ExecCommand *c) {
2049         ExecCommand *i;
2050
2051         while ((i = c)) {
2052                 LIST_REMOVE(command, c, i);
2053                 exec_command_done(i);
2054                 free(i);
2055         }
2056
2057         return NULL;
2058 }
2059
2060 void exec_command_free_array(ExecCommand **c, unsigned n) {
2061         unsigned i;
2062
2063         for (i = 0; i < n; i++)
2064                 c[i] = exec_command_free_list(c[i]);
2065 }
2066
2067 typedef struct InvalidEnvInfo {
2068         const char *unit_id;
2069         const char *path;
2070 } InvalidEnvInfo;
2071
2072 static void invalid_env(const char *p, void *userdata) {
2073         InvalidEnvInfo *info = userdata;
2074
2075         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2076 }
2077
2078 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2079         char **i, **r = NULL;
2080
2081         assert(c);
2082         assert(l);
2083
2084         STRV_FOREACH(i, c->environment_files) {
2085                 char *fn;
2086                 int k;
2087                 bool ignore = false;
2088                 char **p;
2089                 _cleanup_globfree_ glob_t pglob = {};
2090                 int count, n;
2091
2092                 fn = *i;
2093
2094                 if (fn[0] == '-') {
2095                         ignore = true;
2096                         fn ++;
2097                 }
2098
2099                 if (!path_is_absolute(fn)) {
2100                         if (ignore)
2101                                 continue;
2102
2103                         strv_free(r);
2104                         return -EINVAL;
2105                 }
2106
2107                 /* Filename supports globbing, take all matching files */
2108                 errno = 0;
2109                 if (glob(fn, 0, NULL, &pglob) != 0) {
2110                         if (ignore)
2111                                 continue;
2112
2113                         strv_free(r);
2114                         return errno ? -errno : -EINVAL;
2115                 }
2116                 count = pglob.gl_pathc;
2117                 if (count == 0) {
2118                         if (ignore)
2119                                 continue;
2120
2121                         strv_free(r);
2122                         return -EINVAL;
2123                 }
2124                 for (n = 0; n < count; n++) {
2125                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2126                         if (k < 0) {
2127                                 if (ignore)
2128                                         continue;
2129
2130                                 strv_free(r);
2131                                 return k;
2132                         }
2133                         /* Log invalid environment variables with filename */
2134                         if (p) {
2135                                 InvalidEnvInfo info = {
2136                                         .unit_id = unit_id,
2137                                         .path = pglob.gl_pathv[n]
2138                                 };
2139
2140                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2141                         }
2142
2143                         if (r == NULL)
2144                                 r = p;
2145                         else {
2146                                 char **m;
2147
2148                                 m = strv_env_merge(2, r, p);
2149                                 strv_free(r);
2150                                 strv_free(p);
2151                                 if (!m)
2152                                         return -ENOMEM;
2153
2154                                 r = m;
2155                         }
2156                 }
2157         }
2158
2159         *l = r;
2160
2161         return 0;
2162 }
2163
2164 static bool tty_may_match_dev_console(const char *tty) {
2165         _cleanup_free_ char *active = NULL;
2166        char *console;
2167
2168         if (startswith(tty, "/dev/"))
2169                 tty += 5;
2170
2171         /* trivial identity? */
2172         if (streq(tty, "console"))
2173                 return true;
2174
2175         console = resolve_dev_console(&active);
2176         /* if we could not resolve, assume it may */
2177         if (!console)
2178                 return true;
2179
2180         /* "tty0" means the active VC, so it may be the same sometimes */
2181         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2182 }
2183
2184 bool exec_context_may_touch_console(ExecContext *ec) {
2185         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2186                 is_terminal_input(ec->std_input) ||
2187                 is_terminal_output(ec->std_output) ||
2188                 is_terminal_output(ec->std_error)) &&
2189                tty_may_match_dev_console(tty_path(ec));
2190 }
2191
2192 static void strv_fprintf(FILE *f, char **l) {
2193         char **g;
2194
2195         assert(f);
2196
2197         STRV_FOREACH(g, l)
2198                 fprintf(f, " %s", *g);
2199 }
2200
2201 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2202         char **e;
2203         unsigned i;
2204
2205         assert(c);
2206         assert(f);
2207
2208         prefix = strempty(prefix);
2209
2210         fprintf(f,
2211                 "%sUMask: %04o\n"
2212                 "%sWorkingDirectory: %s\n"
2213                 "%sRootDirectory: %s\n"
2214                 "%sNonBlocking: %s\n"
2215                 "%sPrivateTmp: %s\n"
2216                 "%sPrivateNetwork: %s\n"
2217                 "%sPrivateDevices: %s\n"
2218                 "%sProtectHome: %s\n"
2219                 "%sProtectSystem: %s\n"
2220                 "%sIgnoreSIGPIPE: %s\n",
2221                 prefix, c->umask,
2222                 prefix, c->working_directory ? c->working_directory : "/",
2223                 prefix, c->root_directory ? c->root_directory : "/",
2224                 prefix, yes_no(c->non_blocking),
2225                 prefix, yes_no(c->private_tmp),
2226                 prefix, yes_no(c->private_network),
2227                 prefix, yes_no(c->private_devices),
2228                 prefix, protect_home_to_string(c->protect_home),
2229                 prefix, protect_system_to_string(c->protect_system),
2230                 prefix, yes_no(c->ignore_sigpipe));
2231
2232         STRV_FOREACH(e, c->environment)
2233                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2234
2235         STRV_FOREACH(e, c->environment_files)
2236                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2237
2238         if (c->nice_set)
2239                 fprintf(f,
2240                         "%sNice: %i\n",
2241                         prefix, c->nice);
2242
2243         if (c->oom_score_adjust_set)
2244                 fprintf(f,
2245                         "%sOOMScoreAdjust: %i\n",
2246                         prefix, c->oom_score_adjust);
2247
2248         for (i = 0; i < RLIM_NLIMITS; i++)
2249                 if (c->rlimit[i])
2250                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2251                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2252
2253         if (c->ioprio_set) {
2254                 _cleanup_free_ char *class_str = NULL;
2255
2256                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2257                 fprintf(f,
2258                         "%sIOSchedulingClass: %s\n"
2259                         "%sIOPriority: %i\n",
2260                         prefix, strna(class_str),
2261                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2262         }
2263
2264         if (c->cpu_sched_set) {
2265                 _cleanup_free_ char *policy_str = NULL;
2266
2267                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2268                 fprintf(f,
2269                         "%sCPUSchedulingPolicy: %s\n"
2270                         "%sCPUSchedulingPriority: %i\n"
2271                         "%sCPUSchedulingResetOnFork: %s\n",
2272                         prefix, strna(policy_str),
2273                         prefix, c->cpu_sched_priority,
2274                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2275         }
2276
2277         if (c->cpuset) {
2278                 fprintf(f, "%sCPUAffinity:", prefix);
2279                 for (i = 0; i < c->cpuset_ncpus; i++)
2280                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2281                                 fprintf(f, " %u", i);
2282                 fputs("\n", f);
2283         }
2284
2285         if (c->timer_slack_nsec != NSEC_INFINITY)
2286                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2287
2288         fprintf(f,
2289                 "%sStandardInput: %s\n"
2290                 "%sStandardOutput: %s\n"
2291                 "%sStandardError: %s\n",
2292                 prefix, exec_input_to_string(c->std_input),
2293                 prefix, exec_output_to_string(c->std_output),
2294                 prefix, exec_output_to_string(c->std_error));
2295
2296         if (c->tty_path)
2297                 fprintf(f,
2298                         "%sTTYPath: %s\n"
2299                         "%sTTYReset: %s\n"
2300                         "%sTTYVHangup: %s\n"
2301                         "%sTTYVTDisallocate: %s\n",
2302                         prefix, c->tty_path,
2303                         prefix, yes_no(c->tty_reset),
2304                         prefix, yes_no(c->tty_vhangup),
2305                         prefix, yes_no(c->tty_vt_disallocate));
2306
2307         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2308             c->std_output == EXEC_OUTPUT_KMSG ||
2309             c->std_output == EXEC_OUTPUT_JOURNAL ||
2310             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2311             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2312             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2313             c->std_error == EXEC_OUTPUT_SYSLOG ||
2314             c->std_error == EXEC_OUTPUT_KMSG ||
2315             c->std_error == EXEC_OUTPUT_JOURNAL ||
2316             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2317             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2318             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2319
2320                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2321
2322                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2323                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2324
2325                 fprintf(f,
2326                         "%sSyslogFacility: %s\n"
2327                         "%sSyslogLevel: %s\n",
2328                         prefix, strna(fac_str),
2329                         prefix, strna(lvl_str));
2330         }
2331
2332         if (c->capabilities) {
2333                 _cleanup_cap_free_charp_ char *t;
2334
2335                 t = cap_to_text(c->capabilities, NULL);
2336                 if (t)
2337                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2338         }
2339
2340         if (c->secure_bits)
2341                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2342                         prefix,
2343                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2344                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2345                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2346                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2347                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2348                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2349
2350         if (c->capability_bounding_set_drop) {
2351                 unsigned long l;
2352                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2353
2354                 for (l = 0; l <= cap_last_cap(); l++)
2355                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2356                                 fprintf(f, " %s", strna(capability_to_name(l)));
2357
2358                 fputs("\n", f);
2359         }
2360
2361         if (c->user)
2362                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2363         if (c->group)
2364                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2365
2366         if (strv_length(c->supplementary_groups) > 0) {
2367                 fprintf(f, "%sSupplementaryGroups:", prefix);
2368                 strv_fprintf(f, c->supplementary_groups);
2369                 fputs("\n", f);
2370         }
2371
2372         if (c->pam_name)
2373                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2374
2375         if (strv_length(c->read_write_dirs) > 0) {
2376                 fprintf(f, "%sReadWriteDirs:", prefix);
2377                 strv_fprintf(f, c->read_write_dirs);
2378                 fputs("\n", f);
2379         }
2380
2381         if (strv_length(c->read_only_dirs) > 0) {
2382                 fprintf(f, "%sReadOnlyDirs:", prefix);
2383                 strv_fprintf(f, c->read_only_dirs);
2384                 fputs("\n", f);
2385         }
2386
2387         if (strv_length(c->inaccessible_dirs) > 0) {
2388                 fprintf(f, "%sInaccessibleDirs:", prefix);
2389                 strv_fprintf(f, c->inaccessible_dirs);
2390                 fputs("\n", f);
2391         }
2392
2393         if (c->utmp_id)
2394                 fprintf(f,
2395                         "%sUtmpIdentifier: %s\n",
2396                         prefix, c->utmp_id);
2397
2398         if (c->selinux_context)
2399                 fprintf(f,
2400                         "%sSELinuxContext: %s%s\n",
2401                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2402
2403         if (c->personality != 0xffffffffUL)
2404                 fprintf(f,
2405                         "%sPersonality: %s\n",
2406                         prefix, strna(personality_to_string(c->personality)));
2407
2408         if (c->syscall_filter) {
2409 #ifdef HAVE_SECCOMP
2410                 Iterator j;
2411                 void *id;
2412                 bool first = true;
2413 #endif
2414
2415                 fprintf(f,
2416                         "%sSystemCallFilter: ",
2417                         prefix);
2418
2419                 if (!c->syscall_whitelist)
2420                         fputc('~', f);
2421
2422 #ifdef HAVE_SECCOMP
2423                 SET_FOREACH(id, c->syscall_filter, j) {
2424                         _cleanup_free_ char *name = NULL;
2425
2426                         if (first)
2427                                 first = false;
2428                         else
2429                                 fputc(' ', f);
2430
2431                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2432                         fputs(strna(name), f);
2433                 }
2434 #endif
2435
2436                 fputc('\n', f);
2437         }
2438
2439         if (c->syscall_archs) {
2440 #ifdef HAVE_SECCOMP
2441                 Iterator j;
2442                 void *id;
2443 #endif
2444
2445                 fprintf(f,
2446                         "%sSystemCallArchitectures:",
2447                         prefix);
2448
2449 #ifdef HAVE_SECCOMP
2450                 SET_FOREACH(id, c->syscall_archs, j)
2451                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2452 #endif
2453                 fputc('\n', f);
2454         }
2455
2456         if (c->syscall_errno != 0)
2457                 fprintf(f,
2458                         "%sSystemCallErrorNumber: %s\n",
2459                         prefix, strna(errno_to_name(c->syscall_errno)));
2460
2461         if (c->apparmor_profile)
2462                 fprintf(f,
2463                         "%sAppArmorProfile: %s%s\n",
2464                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2465 }
2466
2467 bool exec_context_maintains_privileges(ExecContext *c) {
2468         assert(c);
2469
2470         /* Returns true if the process forked off would run run under
2471          * an unchanged UID or as root. */
2472
2473         if (!c->user)
2474                 return true;
2475
2476         if (streq(c->user, "root") || streq(c->user, "0"))
2477                 return true;
2478
2479         return false;
2480 }
2481
2482 void exec_status_start(ExecStatus *s, pid_t pid) {
2483         assert(s);
2484
2485         zero(*s);
2486         s->pid = pid;
2487         dual_timestamp_get(&s->start_timestamp);
2488 }
2489
2490 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2491         assert(s);
2492
2493         if (s->pid && s->pid != pid)
2494                 zero(*s);
2495
2496         s->pid = pid;
2497         dual_timestamp_get(&s->exit_timestamp);
2498
2499         s->code = code;
2500         s->status = status;
2501
2502         if (context) {
2503                 if (context->utmp_id)
2504                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2505
2506                 exec_context_tty_reset(context);
2507         }
2508 }
2509
2510 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2511         char buf[FORMAT_TIMESTAMP_MAX];
2512
2513         assert(s);
2514         assert(f);
2515
2516         if (s->pid <= 0)
2517                 return;
2518
2519         prefix = strempty(prefix);
2520
2521         fprintf(f,
2522                 "%sPID: "PID_FMT"\n",
2523                 prefix, s->pid);
2524
2525         if (s->start_timestamp.realtime > 0)
2526                 fprintf(f,
2527                         "%sStart Timestamp: %s\n",
2528                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2529
2530         if (s->exit_timestamp.realtime > 0)
2531                 fprintf(f,
2532                         "%sExit Timestamp: %s\n"
2533                         "%sExit Code: %s\n"
2534                         "%sExit Status: %i\n",
2535                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2536                         prefix, sigchld_code_to_string(s->code),
2537                         prefix, s->status);
2538 }
2539
2540 char *exec_command_line(char **argv) {
2541         size_t k;
2542         char *n, *p, **a;
2543         bool first = true;
2544
2545         assert(argv);
2546
2547         k = 1;
2548         STRV_FOREACH(a, argv)
2549                 k += strlen(*a)+3;
2550
2551         if (!(n = new(char, k)))
2552                 return NULL;
2553
2554         p = n;
2555         STRV_FOREACH(a, argv) {
2556
2557                 if (!first)
2558                         *(p++) = ' ';
2559                 else
2560                         first = false;
2561
2562                 if (strpbrk(*a, WHITESPACE)) {
2563                         *(p++) = '\'';
2564                         p = stpcpy(p, *a);
2565                         *(p++) = '\'';
2566                 } else
2567                         p = stpcpy(p, *a);
2568
2569         }
2570
2571         *p = 0;
2572
2573         /* FIXME: this doesn't really handle arguments that have
2574          * spaces and ticks in them */
2575
2576         return n;
2577 }
2578
2579 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2580         _cleanup_free_ char *cmd = NULL;
2581         const char *prefix2;
2582
2583         assert(c);
2584         assert(f);
2585
2586         prefix = strempty(prefix);
2587         prefix2 = strappenda(prefix, "\t");
2588
2589         cmd = exec_command_line(c->argv);
2590         fprintf(f,
2591                 "%sCommand Line: %s\n",
2592                 prefix, cmd ? cmd : strerror(ENOMEM));
2593
2594         exec_status_dump(&c->exec_status, f, prefix2);
2595 }
2596
2597 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2598         assert(f);
2599
2600         prefix = strempty(prefix);
2601
2602         LIST_FOREACH(command, c, c)
2603                 exec_command_dump(c, f, prefix);
2604 }
2605
2606 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2607         ExecCommand *end;
2608
2609         assert(l);
2610         assert(e);
2611
2612         if (*l) {
2613                 /* It's kind of important, that we keep the order here */
2614                 LIST_FIND_TAIL(command, *l, end);
2615                 LIST_INSERT_AFTER(command, *l, end, e);
2616         } else
2617               *l = e;
2618 }
2619
2620 int exec_command_set(ExecCommand *c, const char *path, ...) {
2621         va_list ap;
2622         char **l, *p;
2623
2624         assert(c);
2625         assert(path);
2626
2627         va_start(ap, path);
2628         l = strv_new_ap(path, ap);
2629         va_end(ap);
2630
2631         if (!l)
2632                 return -ENOMEM;
2633
2634         p = strdup(path);
2635         if (!p) {
2636                 strv_free(l);
2637                 return -ENOMEM;
2638         }
2639
2640         free(c->path);
2641         c->path = p;
2642
2643         strv_free(c->argv);
2644         c->argv = l;
2645
2646         return 0;
2647 }
2648
2649 int exec_command_append(ExecCommand *c, const char *path, ...) {
2650         _cleanup_strv_free_ char **l = NULL;
2651         va_list ap;
2652         int r;
2653
2654         assert(c);
2655         assert(path);
2656
2657         va_start(ap, path);
2658         l = strv_new_ap(path, ap);
2659         va_end(ap);
2660
2661         if (!l)
2662                 return -ENOMEM;
2663
2664         r = strv_extend_strv(&c->argv, l);
2665         if (r < 0)
2666                 return r;
2667
2668         return 0;
2669 }
2670
2671
2672 static int exec_runtime_allocate(ExecRuntime **rt) {
2673
2674         if (*rt)
2675                 return 0;
2676
2677         *rt = new0(ExecRuntime, 1);
2678         if (!*rt)
2679                 return -ENOMEM;
2680
2681         (*rt)->n_ref = 1;
2682         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2683
2684         return 0;
2685 }
2686
2687 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2688         int r;
2689
2690         assert(rt);
2691         assert(c);
2692         assert(id);
2693
2694         if (*rt)
2695                 return 1;
2696
2697         if (!c->private_network && !c->private_tmp)
2698                 return 0;
2699
2700         r = exec_runtime_allocate(rt);
2701         if (r < 0)
2702                 return r;
2703
2704         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2705                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2706                         return -errno;
2707         }
2708
2709         if (c->private_tmp && !(*rt)->tmp_dir) {
2710                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2711                 if (r < 0)
2712                         return r;
2713         }
2714
2715         return 1;
2716 }
2717
2718 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2719         assert(r);
2720         assert(r->n_ref > 0);
2721
2722         r->n_ref++;
2723         return r;
2724 }
2725
2726 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2727
2728         if (!r)
2729                 return NULL;
2730
2731         assert(r->n_ref > 0);
2732
2733         r->n_ref--;
2734         if (r->n_ref <= 0) {
2735                 free(r->tmp_dir);
2736                 free(r->var_tmp_dir);
2737                 safe_close_pair(r->netns_storage_socket);
2738                 free(r);
2739         }
2740
2741         return NULL;
2742 }
2743
2744 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2745         assert(u);
2746         assert(f);
2747         assert(fds);
2748
2749         if (!rt)
2750                 return 0;
2751
2752         if (rt->tmp_dir)
2753                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2754
2755         if (rt->var_tmp_dir)
2756                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2757
2758         if (rt->netns_storage_socket[0] >= 0) {
2759                 int copy;
2760
2761                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2762                 if (copy < 0)
2763                         return copy;
2764
2765                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2766         }
2767
2768         if (rt->netns_storage_socket[1] >= 0) {
2769                 int copy;
2770
2771                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2772                 if (copy < 0)
2773                         return copy;
2774
2775                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2776         }
2777
2778         return 0;
2779 }
2780
2781 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2782         int r;
2783
2784         assert(rt);
2785         assert(key);
2786         assert(value);
2787
2788         if (streq(key, "tmp-dir")) {
2789                 char *copy;
2790
2791                 r = exec_runtime_allocate(rt);
2792                 if (r < 0)
2793                         return r;
2794
2795                 copy = strdup(value);
2796                 if (!copy)
2797                         return log_oom();
2798
2799                 free((*rt)->tmp_dir);
2800                 (*rt)->tmp_dir = copy;
2801
2802         } else if (streq(key, "var-tmp-dir")) {
2803                 char *copy;
2804
2805                 r = exec_runtime_allocate(rt);
2806                 if (r < 0)
2807                         return r;
2808
2809                 copy = strdup(value);
2810                 if (!copy)
2811                         return log_oom();
2812
2813                 free((*rt)->var_tmp_dir);
2814                 (*rt)->var_tmp_dir = copy;
2815
2816         } else if (streq(key, "netns-socket-0")) {
2817                 int fd;
2818
2819                 r = exec_runtime_allocate(rt);
2820                 if (r < 0)
2821                         return r;
2822
2823                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2824                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2825                 else {
2826                         safe_close((*rt)->netns_storage_socket[0]);
2827                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2828                 }
2829         } else if (streq(key, "netns-socket-1")) {
2830                 int fd;
2831
2832                 r = exec_runtime_allocate(rt);
2833                 if (r < 0)
2834                         return r;
2835
2836                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2837                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2838                 else {
2839                         safe_close((*rt)->netns_storage_socket[1]);
2840                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2841                 }
2842         } else
2843                 return 0;
2844
2845         return 1;
2846 }
2847
2848 static void *remove_tmpdir_thread(void *p) {
2849         _cleanup_free_ char *path = p;
2850
2851         rm_rf_dangerous(path, false, true, false);
2852         return NULL;
2853 }
2854
2855 void exec_runtime_destroy(ExecRuntime *rt) {
2856         int r;
2857
2858         if (!rt)
2859                 return;
2860
2861         /* If there are multiple users of this, let's leave the stuff around */
2862         if (rt->n_ref > 1)
2863                 return;
2864
2865         if (rt->tmp_dir) {
2866                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2867
2868                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2869                 if (r < 0) {
2870                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2871                         free(rt->tmp_dir);
2872                 }
2873
2874                 rt->tmp_dir = NULL;
2875         }
2876
2877         if (rt->var_tmp_dir) {
2878                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2879
2880                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2881                 if (r < 0) {
2882                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2883                         free(rt->var_tmp_dir);
2884                 }
2885
2886                 rt->var_tmp_dir = NULL;
2887         }
2888
2889         safe_close_pair(rt->netns_storage_socket);
2890 }
2891
2892 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2893         [EXEC_INPUT_NULL] = "null",
2894         [EXEC_INPUT_TTY] = "tty",
2895         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2896         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2897         [EXEC_INPUT_SOCKET] = "socket"
2898 };
2899
2900 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2901
2902 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2903         [EXEC_OUTPUT_INHERIT] = "inherit",
2904         [EXEC_OUTPUT_NULL] = "null",
2905         [EXEC_OUTPUT_TTY] = "tty",
2906         [EXEC_OUTPUT_SYSLOG] = "syslog",
2907         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2908         [EXEC_OUTPUT_KMSG] = "kmsg",
2909         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2910         [EXEC_OUTPUT_JOURNAL] = "journal",
2911         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2912         [EXEC_OUTPUT_SOCKET] = "socket"
2913 };
2914
2915 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);