chiark / gitweb /
core: check both EPERM and EACCES for OOM adjustments
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
88 #include "label.h"
89 #include "cap-list.h"
90
91 #ifdef HAVE_SECCOMP
92 #include "seccomp-util.h"
93 #endif
94
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97
98 /* This assumes there is a 'tty' group */
99 #define TTY_MODE 0620
100
101 #define SNDBUF_SIZE (8*1024*1024)
102
103 static int shift_fds(int fds[], unsigned n_fds) {
104         int start, restart_from;
105
106         if (n_fds <= 0)
107                 return 0;
108
109         /* Modifies the fds array! (sorts it) */
110
111         assert(fds);
112
113         start = 0;
114         for (;;) {
115                 int i;
116
117                 restart_from = -1;
118
119                 for (i = start; i < (int) n_fds; i++) {
120                         int nfd;
121
122                         /* Already at right index? */
123                         if (fds[i] == i+3)
124                                 continue;
125
126                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
127                                 return -errno;
128
129                         safe_close(fds[i]);
130                         fds[i] = nfd;
131
132                         /* Hmm, the fd we wanted isn't free? Then
133                          * let's remember that and try again from here */
134                         if (nfd != i+3 && restart_from < 0)
135                                 restart_from = i;
136                 }
137
138                 if (restart_from < 0)
139                         break;
140
141                 start = restart_from;
142         }
143
144         return 0;
145 }
146
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148         unsigned i;
149         int r;
150
151         if (n_fds <= 0)
152                 return 0;
153
154         assert(fds);
155
156         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157
158         for (i = 0; i < n_fds; i++) {
159
160                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161                         return r;
162
163                 /* We unconditionally drop FD_CLOEXEC from the fds,
164                  * since after all we want to pass these fds to our
165                  * children */
166
167                 if ((r = fd_cloexec(fds[i], false)) < 0)
168                         return r;
169         }
170
171         return 0;
172 }
173
174 _pure_ static const char *tty_path(const ExecContext *context) {
175         assert(context);
176
177         if (context->tty_path)
178                 return context->tty_path;
179
180         return "/dev/console";
181 }
182
183 static void exec_context_tty_reset(const ExecContext *context) {
184         assert(context);
185
186         if (context->tty_vhangup)
187                 terminal_vhangup(tty_path(context));
188
189         if (context->tty_reset)
190                 reset_terminal(tty_path(context));
191
192         if (context->tty_vt_disallocate && context->tty_path)
193                 vt_disallocate(context->tty_path);
194 }
195
196 static bool is_terminal_output(ExecOutput o) {
197         return
198                 o == EXEC_OUTPUT_TTY ||
199                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 }
203
204 static int open_null_as(int flags, int nfd) {
205         int fd, r;
206
207         assert(nfd >= 0);
208
209         fd = open("/dev/null", flags|O_NOCTTY);
210         if (fd < 0)
211                 return -errno;
212
213         if (fd != nfd) {
214                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
215                 safe_close(fd);
216         } else
217                 r = nfd;
218
219         return r;
220 }
221
222 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
223         union sockaddr_union sa = {
224                 .un.sun_family = AF_UNIX,
225                 .un.sun_path = "/run/systemd/journal/stdout",
226         };
227         uid_t olduid = UID_INVALID;
228         gid_t oldgid = GID_INVALID;
229         int r;
230
231         if (gid != GID_INVALID) {
232                 oldgid = getgid();
233
234                 r = setegid(gid);
235                 if (r < 0)
236                         return -errno;
237         }
238
239         if (uid != UID_INVALID) {
240                 olduid = getuid();
241
242                 r = seteuid(uid);
243                 if (r < 0) {
244                         r = -errno;
245                         goto restore_gid;
246                 }
247         }
248
249         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
250         if (r < 0)
251                 r = -errno;
252
253         /* If we fail to restore the uid or gid, things will likely
254            fail later on. This should only happen if an LSM interferes. */
255
256         if (uid != UID_INVALID)
257                 (void) seteuid(olduid);
258
259  restore_gid:
260         if (gid != GID_INVALID)
261                 (void) setegid(oldgid);
262
263         return r;
264 }
265
266 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
267         int fd, r;
268
269         assert(context);
270         assert(output < _EXEC_OUTPUT_MAX);
271         assert(ident);
272         assert(nfd >= 0);
273
274         fd = socket(AF_UNIX, SOCK_STREAM, 0);
275         if (fd < 0)
276                 return -errno;
277
278         r = connect_journal_socket(fd, uid, gid);
279         if (r < 0)
280                 return r;
281
282         if (shutdown(fd, SHUT_RD) < 0) {
283                 safe_close(fd);
284                 return -errno;
285         }
286
287         fd_inc_sndbuf(fd, SNDBUF_SIZE);
288
289         dprintf(fd,
290                 "%s\n"
291                 "%s\n"
292                 "%i\n"
293                 "%i\n"
294                 "%i\n"
295                 "%i\n"
296                 "%i\n",
297                 context->syslog_identifier ? context->syslog_identifier : ident,
298                 unit_id,
299                 context->syslog_priority,
300                 !!context->syslog_level_prefix,
301                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
302                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
303                 is_terminal_output(output));
304
305         if (fd != nfd) {
306                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
307                 safe_close(fd);
308         } else
309                 r = nfd;
310
311         return r;
312 }
313 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
314         int fd, r;
315
316         assert(path);
317         assert(nfd >= 0);
318
319         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
320                 return fd;
321
322         if (fd != nfd) {
323                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
324                 safe_close(fd);
325         } else
326                 r = nfd;
327
328         return r;
329 }
330
331 static bool is_terminal_input(ExecInput i) {
332         return
333                 i == EXEC_INPUT_TTY ||
334                 i == EXEC_INPUT_TTY_FORCE ||
335                 i == EXEC_INPUT_TTY_FAIL;
336 }
337
338 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
339
340         if (is_terminal_input(std_input) && !apply_tty_stdin)
341                 return EXEC_INPUT_NULL;
342
343         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
344                 return EXEC_INPUT_NULL;
345
346         return std_input;
347 }
348
349 static int fixup_output(ExecOutput std_output, int socket_fd) {
350
351         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
352                 return EXEC_OUTPUT_INHERIT;
353
354         return std_output;
355 }
356
357 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
358         ExecInput i;
359
360         assert(context);
361
362         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
363
364         switch (i) {
365
366         case EXEC_INPUT_NULL:
367                 return open_null_as(O_RDONLY, STDIN_FILENO);
368
369         case EXEC_INPUT_TTY:
370         case EXEC_INPUT_TTY_FORCE:
371         case EXEC_INPUT_TTY_FAIL: {
372                 int fd, r;
373
374                 fd = acquire_terminal(tty_path(context),
375                                       i == EXEC_INPUT_TTY_FAIL,
376                                       i == EXEC_INPUT_TTY_FORCE,
377                                       false,
378                                       USEC_INFINITY);
379                 if (fd < 0)
380                         return fd;
381
382                 if (fd != STDIN_FILENO) {
383                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
384                         safe_close(fd);
385                 } else
386                         r = STDIN_FILENO;
387
388                 return r;
389         }
390
391         case EXEC_INPUT_SOCKET:
392                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
393
394         default:
395                 assert_not_reached("Unknown input type");
396         }
397 }
398
399 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
400         ExecOutput o;
401         ExecInput i;
402         int r;
403
404         assert(context);
405         assert(ident);
406
407         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
408         o = fixup_output(context->std_output, socket_fd);
409
410         if (fileno == STDERR_FILENO) {
411                 ExecOutput e;
412                 e = fixup_output(context->std_error, socket_fd);
413
414                 /* This expects the input and output are already set up */
415
416                 /* Don't change the stderr file descriptor if we inherit all
417                  * the way and are not on a tty */
418                 if (e == EXEC_OUTPUT_INHERIT &&
419                     o == EXEC_OUTPUT_INHERIT &&
420                     i == EXEC_INPUT_NULL &&
421                     !is_terminal_input(context->std_input) &&
422                     getppid () != 1)
423                         return fileno;
424
425                 /* Duplicate from stdout if possible */
426                 if (e == o || e == EXEC_OUTPUT_INHERIT)
427                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
428
429                 o = e;
430
431         } else if (o == EXEC_OUTPUT_INHERIT) {
432                 /* If input got downgraded, inherit the original value */
433                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
434                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
435
436                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437                 if (i != EXEC_INPUT_NULL)
438                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
439
440                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
441                 if (getppid() != 1)
442                         return fileno;
443
444                 /* We need to open /dev/null here anew, to get the right access mode. */
445                 return open_null_as(O_WRONLY, fileno);
446         }
447
448         switch (o) {
449
450         case EXEC_OUTPUT_NULL:
451                 return open_null_as(O_WRONLY, fileno);
452
453         case EXEC_OUTPUT_TTY:
454                 if (is_terminal_input(i))
455                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
456
457                 /* We don't reset the terminal if this is just about output */
458                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
459
460         case EXEC_OUTPUT_SYSLOG:
461         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
462         case EXEC_OUTPUT_KMSG:
463         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
464         case EXEC_OUTPUT_JOURNAL:
465         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
466                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
467                 if (r < 0) {
468                         log_unit_struct(unit_id,
469                                         LOG_ERR,
470                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
471                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
472                                                     unit_id, strerror(-r)),
473                                         LOG_ERRNO(-r),
474                                         NULL);
475                         r = open_null_as(O_WRONLY, fileno);
476                 }
477                 return r;
478
479         case EXEC_OUTPUT_SOCKET:
480                 assert(socket_fd >= 0);
481                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
482
483         default:
484                 assert_not_reached("Unknown error type");
485         }
486 }
487
488 static int chown_terminal(int fd, uid_t uid) {
489         struct stat st;
490
491         assert(fd >= 0);
492
493         /* This might fail. What matters are the results. */
494         (void) fchown(fd, uid, -1);
495         (void) fchmod(fd, TTY_MODE);
496
497         if (fstat(fd, &st) < 0)
498                 return -errno;
499
500         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
501                 return -EPERM;
502
503         return 0;
504 }
505
506 static int setup_confirm_stdio(int *_saved_stdin,
507                                int *_saved_stdout) {
508         int fd = -1, saved_stdin, saved_stdout = -1, r;
509
510         assert(_saved_stdin);
511         assert(_saved_stdout);
512
513         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
514         if (saved_stdin < 0)
515                 return -errno;
516
517         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
518         if (saved_stdout < 0) {
519                 r = errno;
520                 goto fail;
521         }
522
523         fd = acquire_terminal(
524                         "/dev/console",
525                         false,
526                         false,
527                         false,
528                         DEFAULT_CONFIRM_USEC);
529         if (fd < 0) {
530                 r = fd;
531                 goto fail;
532         }
533
534         r = chown_terminal(fd, getuid());
535         if (r < 0)
536                 goto fail;
537
538         if (dup2(fd, STDIN_FILENO) < 0) {
539                 r = -errno;
540                 goto fail;
541         }
542
543         if (dup2(fd, STDOUT_FILENO) < 0) {
544                 r = -errno;
545                 goto fail;
546         }
547
548         if (fd >= 2)
549                 safe_close(fd);
550
551         *_saved_stdin = saved_stdin;
552         *_saved_stdout = saved_stdout;
553
554         return 0;
555
556 fail:
557         safe_close(saved_stdout);
558         safe_close(saved_stdin);
559         safe_close(fd);
560
561         return r;
562 }
563
564 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
565         _cleanup_close_ int fd = -1;
566         va_list ap;
567
568         assert(format);
569
570         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
571         if (fd < 0)
572                 return fd;
573
574         va_start(ap, format);
575         vdprintf(fd, format, ap);
576         va_end(ap);
577
578         return 0;
579 }
580
581 static int restore_confirm_stdio(int *saved_stdin,
582                                  int *saved_stdout) {
583
584         int r = 0;
585
586         assert(saved_stdin);
587         assert(saved_stdout);
588
589         release_terminal();
590
591         if (*saved_stdin >= 0)
592                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
593                         r = -errno;
594
595         if (*saved_stdout >= 0)
596                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
597                         r = -errno;
598
599         safe_close(*saved_stdin);
600         safe_close(*saved_stdout);
601
602         return r;
603 }
604
605 static int ask_for_confirmation(char *response, char **argv) {
606         int saved_stdout = -1, saved_stdin = -1, r;
607         _cleanup_free_ char *line = NULL;
608
609         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
610         if (r < 0)
611                 return r;
612
613         line = exec_command_line(argv);
614         if (!line)
615                 return -ENOMEM;
616
617         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
618
619         restore_confirm_stdio(&saved_stdin, &saved_stdout);
620
621         return r;
622 }
623
624 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
625         bool keep_groups = false;
626         int r;
627
628         assert(context);
629
630         /* Lookup and set GID and supplementary group list. Here too
631          * we avoid NSS lookups for gid=0. */
632
633         if (context->group || username) {
634
635                 if (context->group) {
636                         const char *g = context->group;
637
638                         if ((r = get_group_creds(&g, &gid)) < 0)
639                                 return r;
640                 }
641
642                 /* First step, initialize groups from /etc/groups */
643                 if (username && gid != 0) {
644                         if (initgroups(username, gid) < 0)
645                                 return -errno;
646
647                         keep_groups = true;
648                 }
649
650                 /* Second step, set our gids */
651                 if (setresgid(gid, gid, gid) < 0)
652                         return -errno;
653         }
654
655         if (context->supplementary_groups) {
656                 int ngroups_max, k;
657                 gid_t *gids;
658                 char **i;
659
660                 /* Final step, initialize any manually set supplementary groups */
661                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
662
663                 if (!(gids = new(gid_t, ngroups_max)))
664                         return -ENOMEM;
665
666                 if (keep_groups) {
667                         if ((k = getgroups(ngroups_max, gids)) < 0) {
668                                 free(gids);
669                                 return -errno;
670                         }
671                 } else
672                         k = 0;
673
674                 STRV_FOREACH(i, context->supplementary_groups) {
675                         const char *g;
676
677                         if (k >= ngroups_max) {
678                                 free(gids);
679                                 return -E2BIG;
680                         }
681
682                         g = *i;
683                         r = get_group_creds(&g, gids+k);
684                         if (r < 0) {
685                                 free(gids);
686                                 return r;
687                         }
688
689                         k++;
690                 }
691
692                 if (setgroups(k, gids) < 0) {
693                         free(gids);
694                         return -errno;
695                 }
696
697                 free(gids);
698         }
699
700         return 0;
701 }
702
703 static int enforce_user(const ExecContext *context, uid_t uid) {
704         assert(context);
705
706         /* Sets (but doesn't lookup) the uid and make sure we keep the
707          * capabilities while doing so. */
708
709         if (context->capabilities) {
710                 _cleanup_cap_free_ cap_t d = NULL;
711                 static const cap_value_t bits[] = {
712                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
713                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
714                 };
715
716                 /* First step: If we need to keep capabilities but
717                  * drop privileges we need to make sure we keep our
718                  * caps, while we drop privileges. */
719                 if (uid != 0) {
720                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
721
722                         if (prctl(PR_GET_SECUREBITS) != sb)
723                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
724                                         return -errno;
725                 }
726
727                 /* Second step: set the capabilities. This will reduce
728                  * the capabilities to the minimum we need. */
729
730                 d = cap_dup(context->capabilities);
731                 if (!d)
732                         return -errno;
733
734                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736                         return -errno;
737
738                 if (cap_set_proc(d) < 0)
739                         return -errno;
740         }
741
742         /* Third step: actually set the uids */
743         if (setresuid(uid, uid, uid) < 0)
744                 return -errno;
745
746         /* At this point we should have all necessary capabilities but
747            are otherwise a normal user. However, the caps might got
748            corrupted due to the setresuid() so we need clean them up
749            later. This is done outside of this call. */
750
751         return 0;
752 }
753
754 #ifdef HAVE_PAM
755
756 static int null_conv(
757                 int num_msg,
758                 const struct pam_message **msg,
759                 struct pam_response **resp,
760                 void *appdata_ptr) {
761
762         /* We don't support conversations */
763
764         return PAM_CONV_ERR;
765 }
766
767 static int setup_pam(
768                 const char *name,
769                 const char *user,
770                 uid_t uid,
771                 const char *tty,
772                 char ***pam_env,
773                 int fds[], unsigned n_fds) {
774
775         static const struct pam_conv conv = {
776                 .conv = null_conv,
777                 .appdata_ptr = NULL
778         };
779
780         pam_handle_t *handle = NULL;
781         sigset_t ss, old_ss;
782         int pam_code = PAM_SUCCESS;
783         int err;
784         char **e = NULL;
785         bool close_session = false;
786         pid_t pam_pid = 0, parent_pid;
787         int flags = 0;
788
789         assert(name);
790         assert(user);
791         assert(pam_env);
792
793         /* We set up PAM in the parent process, then fork. The child
794          * will then stay around until killed via PR_GET_PDEATHSIG or
795          * systemd via the cgroup logic. It will then remove the PAM
796          * session again. The parent process will exec() the actual
797          * daemon. We do things this way to ensure that the main PID
798          * of the daemon is the one we initially fork()ed. */
799
800         if (log_get_max_level() < LOG_DEBUG)
801                 flags |= PAM_SILENT;
802
803         pam_code = pam_start(name, user, &conv, &handle);
804         if (pam_code != PAM_SUCCESS) {
805                 handle = NULL;
806                 goto fail;
807         }
808
809         if (tty) {
810                 pam_code = pam_set_item(handle, PAM_TTY, tty);
811                 if (pam_code != PAM_SUCCESS)
812                         goto fail;
813         }
814
815         pam_code = pam_acct_mgmt(handle, flags);
816         if (pam_code != PAM_SUCCESS)
817                 goto fail;
818
819         pam_code = pam_open_session(handle, flags);
820         if (pam_code != PAM_SUCCESS)
821                 goto fail;
822
823         close_session = true;
824
825         e = pam_getenvlist(handle);
826         if (!e) {
827                 pam_code = PAM_BUF_ERR;
828                 goto fail;
829         }
830
831         /* Block SIGTERM, so that we know that it won't get lost in
832          * the child */
833         if (sigemptyset(&ss) < 0 ||
834             sigaddset(&ss, SIGTERM) < 0 ||
835             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
836                 goto fail;
837
838         parent_pid = getpid();
839
840         pam_pid = fork();
841         if (pam_pid < 0)
842                 goto fail;
843
844         if (pam_pid == 0) {
845                 int sig;
846                 int r = EXIT_PAM;
847
848                 /* The child's job is to reset the PAM session on
849                  * termination */
850
851                 /* This string must fit in 10 chars (i.e. the length
852                  * of "/sbin/init"), to look pretty in /bin/ps */
853                 rename_process("(sd-pam)");
854
855                 /* Make sure we don't keep open the passed fds in this
856                 child. We assume that otherwise only those fds are
857                 open here that have been opened by PAM. */
858                 close_many(fds, n_fds);
859
860                 /* Drop privileges - we don't need any to pam_close_session
861                  * and this will make PR_SET_PDEATHSIG work in most cases.
862                  * If this fails, ignore the error - but expect sd-pam threads
863                  * to fail to exit normally */
864                 if (setresuid(uid, uid, uid) < 0)
865                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
866
867                 /* Wait until our parent died. This will only work if
868                  * the above setresuid() succeeds, otherwise the kernel
869                  * will not allow unprivileged parents kill their privileged
870                  * children this way. We rely on the control groups kill logic
871                  * to do the rest for us. */
872                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873                         goto child_finish;
874
875                 /* Check if our parent process might already have
876                  * died? */
877                 if (getppid() == parent_pid) {
878                         for (;;) {
879                                 if (sigwait(&ss, &sig) < 0) {
880                                         if (errno == EINTR)
881                                                 continue;
882
883                                         goto child_finish;
884                                 }
885
886                                 assert(sig == SIGTERM);
887                                 break;
888                         }
889                 }
890
891                 /* If our parent died we'll end the session */
892                 if (getppid() != parent_pid) {
893                         pam_code = pam_close_session(handle, flags);
894                         if (pam_code != PAM_SUCCESS)
895                                 goto child_finish;
896                 }
897
898                 r = 0;
899
900         child_finish:
901                 pam_end(handle, pam_code | flags);
902                 _exit(r);
903         }
904
905         /* If the child was forked off successfully it will do all the
906          * cleanups, so forget about the handle here. */
907         handle = NULL;
908
909         /* Unblock SIGTERM again in the parent */
910         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
911                 goto fail;
912
913         /* We close the log explicitly here, since the PAM modules
914          * might have opened it, but we don't want this fd around. */
915         closelog();
916
917         *pam_env = e;
918         e = NULL;
919
920         return 0;
921
922 fail:
923         if (pam_code != PAM_SUCCESS) {
924                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
925                 err = -EPERM;  /* PAM errors do not map to errno */
926         } else {
927                 log_error_errno(errno, "PAM failed: %m");
928                 err = -errno;
929         }
930
931         if (handle) {
932                 if (close_session)
933                         pam_code = pam_close_session(handle, flags);
934
935                 pam_end(handle, pam_code | flags);
936         }
937
938         strv_free(e);
939
940         closelog();
941
942         if (pam_pid > 1) {
943                 kill(pam_pid, SIGTERM);
944                 kill(pam_pid, SIGCONT);
945         }
946
947         return err;
948 }
949 #endif
950
951 static void rename_process_from_path(const char *path) {
952         char process_name[11];
953         const char *p;
954         size_t l;
955
956         /* This resulting string must fit in 10 chars (i.e. the length
957          * of "/sbin/init") to look pretty in /bin/ps */
958
959         p = basename(path);
960         if (isempty(p)) {
961                 rename_process("(...)");
962                 return;
963         }
964
965         l = strlen(p);
966         if (l > 8) {
967                 /* The end of the process name is usually more
968                  * interesting, since the first bit might just be
969                  * "systemd-" */
970                 p = p + l - 8;
971                 l = 8;
972         }
973
974         process_name[0] = '(';
975         memcpy(process_name+1, p, l);
976         process_name[1+l] = ')';
977         process_name[1+l+1] = 0;
978
979         rename_process(process_name);
980 }
981
982 #ifdef HAVE_SECCOMP
983
984 static int apply_seccomp(const ExecContext *c) {
985         uint32_t negative_action, action;
986         scmp_filter_ctx *seccomp;
987         Iterator i;
988         void *id;
989         int r;
990
991         assert(c);
992
993         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
994
995         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
996         if (!seccomp)
997                 return -ENOMEM;
998
999         if (c->syscall_archs) {
1000
1001                 SET_FOREACH(id, c->syscall_archs, i) {
1002                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1003                         if (r == -EEXIST)
1004                                 continue;
1005                         if (r < 0)
1006                                 goto finish;
1007                 }
1008
1009         } else {
1010                 r = seccomp_add_secondary_archs(seccomp);
1011                 if (r < 0)
1012                         goto finish;
1013         }
1014
1015         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1016         SET_FOREACH(id, c->syscall_filter, i) {
1017                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1018                 if (r < 0)
1019                         goto finish;
1020         }
1021
1022         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1023         if (r < 0)
1024                 goto finish;
1025
1026         r = seccomp_load(seccomp);
1027
1028 finish:
1029         seccomp_release(seccomp);
1030         return r;
1031 }
1032
1033 static int apply_address_families(const ExecContext *c) {
1034         scmp_filter_ctx *seccomp;
1035         Iterator i;
1036         int r;
1037
1038         assert(c);
1039
1040         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1041         if (!seccomp)
1042                 return -ENOMEM;
1043
1044         r = seccomp_add_secondary_archs(seccomp);
1045         if (r < 0)
1046                 goto finish;
1047
1048         if (c->address_families_whitelist) {
1049                 int af, first = 0, last = 0;
1050                 void *afp;
1051
1052                 /* If this is a whitelist, we first block the address
1053                  * families that are out of range and then everything
1054                  * that is not in the set. First, we find the lowest
1055                  * and highest address family in the set. */
1056
1057                 SET_FOREACH(afp, c->address_families, i) {
1058                         af = PTR_TO_INT(afp);
1059
1060                         if (af <= 0 || af >= af_max())
1061                                 continue;
1062
1063                         if (first == 0 || af < first)
1064                                 first = af;
1065
1066                         if (last == 0 || af > last)
1067                                 last = af;
1068                 }
1069
1070                 assert((first == 0) == (last == 0));
1071
1072                 if (first == 0) {
1073
1074                         /* No entries in the valid range, block everything */
1075                         r = seccomp_rule_add(
1076                                         seccomp,
1077                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1078                                         SCMP_SYS(socket),
1079                                         0);
1080                         if (r < 0)
1081                                 goto finish;
1082
1083                 } else {
1084
1085                         /* Block everything below the first entry */
1086                         r = seccomp_rule_add(
1087                                         seccomp,
1088                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1089                                         SCMP_SYS(socket),
1090                                         1,
1091                                         SCMP_A0(SCMP_CMP_LT, first));
1092                         if (r < 0)
1093                                 goto finish;
1094
1095                         /* Block everything above the last entry */
1096                         r = seccomp_rule_add(
1097                                         seccomp,
1098                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1099                                         SCMP_SYS(socket),
1100                                         1,
1101                                         SCMP_A0(SCMP_CMP_GT, last));
1102                         if (r < 0)
1103                                 goto finish;
1104
1105                         /* Block everything between the first and last
1106                          * entry */
1107                         for (af = 1; af < af_max(); af++) {
1108
1109                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1110                                         continue;
1111
1112                                 r = seccomp_rule_add(
1113                                                 seccomp,
1114                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1115                                                 SCMP_SYS(socket),
1116                                                 1,
1117                                                 SCMP_A0(SCMP_CMP_EQ, af));
1118                                 if (r < 0)
1119                                         goto finish;
1120                         }
1121                 }
1122
1123         } else {
1124                 void *af;
1125
1126                 /* If this is a blacklist, then generate one rule for
1127                  * each address family that are then combined in OR
1128                  * checks. */
1129
1130                 SET_FOREACH(af, c->address_families, i) {
1131
1132                         r = seccomp_rule_add(
1133                                         seccomp,
1134                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1135                                         SCMP_SYS(socket),
1136                                         1,
1137                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1138                         if (r < 0)
1139                                 goto finish;
1140                 }
1141         }
1142
1143         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1144         if (r < 0)
1145                 goto finish;
1146
1147         r = seccomp_load(seccomp);
1148
1149 finish:
1150         seccomp_release(seccomp);
1151         return r;
1152 }
1153
1154 #endif
1155
1156 static void do_idle_pipe_dance(int idle_pipe[4]) {
1157         assert(idle_pipe);
1158
1159
1160         safe_close(idle_pipe[1]);
1161         safe_close(idle_pipe[2]);
1162
1163         if (idle_pipe[0] >= 0) {
1164                 int r;
1165
1166                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1167
1168                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1169                         /* Signal systemd that we are bored and want to continue. */
1170                         write(idle_pipe[3], "x", 1);
1171
1172                         /* Wait for systemd to react to the signal above. */
1173                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1174                 }
1175
1176                 safe_close(idle_pipe[0]);
1177
1178         }
1179
1180         safe_close(idle_pipe[3]);
1181 }
1182
1183 static int build_environment(
1184                 const ExecContext *c,
1185                 unsigned n_fds,
1186                 usec_t watchdog_usec,
1187                 const char *home,
1188                 const char *username,
1189                 const char *shell,
1190                 char ***ret) {
1191
1192         _cleanup_strv_free_ char **our_env = NULL;
1193         unsigned n_env = 0;
1194         char *x;
1195
1196         assert(c);
1197         assert(ret);
1198
1199         our_env = new0(char*, 10);
1200         if (!our_env)
1201                 return -ENOMEM;
1202
1203         if (n_fds > 0) {
1204                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1205                         return -ENOMEM;
1206                 our_env[n_env++] = x;
1207
1208                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1209                         return -ENOMEM;
1210                 our_env[n_env++] = x;
1211         }
1212
1213         if (watchdog_usec > 0) {
1214                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1215                         return -ENOMEM;
1216                 our_env[n_env++] = x;
1217
1218                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1219                         return -ENOMEM;
1220                 our_env[n_env++] = x;
1221         }
1222
1223         if (home) {
1224                 x = strappend("HOME=", home);
1225                 if (!x)
1226                         return -ENOMEM;
1227                 our_env[n_env++] = x;
1228         }
1229
1230         if (username) {
1231                 x = strappend("LOGNAME=", username);
1232                 if (!x)
1233                         return -ENOMEM;
1234                 our_env[n_env++] = x;
1235
1236                 x = strappend("USER=", username);
1237                 if (!x)
1238                         return -ENOMEM;
1239                 our_env[n_env++] = x;
1240         }
1241
1242         if (shell) {
1243                 x = strappend("SHELL=", shell);
1244                 if (!x)
1245                         return -ENOMEM;
1246                 our_env[n_env++] = x;
1247         }
1248
1249         if (is_terminal_input(c->std_input) ||
1250             c->std_output == EXEC_OUTPUT_TTY ||
1251             c->std_error == EXEC_OUTPUT_TTY ||
1252             c->tty_path) {
1253
1254                 x = strdup(default_term_for_tty(tty_path(c)));
1255                 if (!x)
1256                         return -ENOMEM;
1257                 our_env[n_env++] = x;
1258         }
1259
1260         our_env[n_env++] = NULL;
1261         assert(n_env <= 10);
1262
1263         *ret = our_env;
1264         our_env = NULL;
1265
1266         return 0;
1267 }
1268
1269 static int exec_child(ExecCommand *command,
1270                       const ExecContext *context,
1271                       const ExecParameters *params,
1272                       ExecRuntime *runtime,
1273                       char **argv,
1274                       int socket_fd,
1275                       int *fds, unsigned n_fds,
1276                       char **files_env,
1277                       int *error) {
1278
1279         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1280         _cleanup_free_ char *mac_selinux_context_net = NULL;
1281         const char *username = NULL, *home = NULL, *shell = NULL;
1282         unsigned n_dont_close = 0;
1283         int dont_close[n_fds + 4];
1284         uid_t uid = UID_INVALID;
1285         gid_t gid = GID_INVALID;
1286         int i, err;
1287
1288         assert(command);
1289         assert(context);
1290         assert(params);
1291         assert(error);
1292
1293         rename_process_from_path(command->path);
1294
1295         /* We reset exactly these signals, since they are the
1296          * only ones we set to SIG_IGN in the main daemon. All
1297          * others we leave untouched because we set them to
1298          * SIG_DFL or a valid handler initially, both of which
1299          * will be demoted to SIG_DFL. */
1300         default_signals(SIGNALS_CRASH_HANDLER,
1301                         SIGNALS_IGNORE, -1);
1302
1303         if (context->ignore_sigpipe)
1304                 ignore_signals(SIGPIPE, -1);
1305
1306         err = reset_signal_mask();
1307         if (err < 0) {
1308                 *error = EXIT_SIGNAL_MASK;
1309                 return err;
1310         }
1311
1312         if (params->idle_pipe)
1313                 do_idle_pipe_dance(params->idle_pipe);
1314
1315         /* Close sockets very early to make sure we don't
1316          * block init reexecution because it cannot bind its
1317          * sockets */
1318         log_forget_fds();
1319
1320         if (socket_fd >= 0)
1321                 dont_close[n_dont_close++] = socket_fd;
1322         if (n_fds > 0) {
1323                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324                 n_dont_close += n_fds;
1325         }
1326         if (params->bus_endpoint_fd >= 0)
1327                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1328         if (runtime) {
1329                 if (runtime->netns_storage_socket[0] >= 0)
1330                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331                 if (runtime->netns_storage_socket[1] >= 0)
1332                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1333         }
1334
1335         err = close_all_fds(dont_close, n_dont_close);
1336         if (err < 0) {
1337                 *error = EXIT_FDS;
1338                 return err;
1339         }
1340
1341         if (!context->same_pgrp)
1342                 if (setsid() < 0) {
1343                         *error = EXIT_SETSID;
1344                         return -errno;
1345                 }
1346
1347         exec_context_tty_reset(context);
1348
1349         if (params->confirm_spawn) {
1350                 char response;
1351
1352                 err = ask_for_confirmation(&response, argv);
1353                 if (err == -ETIMEDOUT)
1354                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1355                 else if (err < 0)
1356                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1357                 else if (response == 's') {
1358                         write_confirm_message("Skipping execution.\n");
1359                         *error = EXIT_CONFIRM;
1360                         return -ECANCELED;
1361                 } else if (response == 'n') {
1362                         write_confirm_message("Failing execution.\n");
1363                         *error = 0;
1364                         return 0;
1365                 }
1366         }
1367
1368         if (context->user) {
1369                 username = context->user;
1370                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1371                 if (err < 0) {
1372                         *error = EXIT_USER;
1373                         return err;
1374                 }
1375         }
1376
1377         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378          * must sure to drop O_NONBLOCK */
1379         if (socket_fd >= 0)
1380                 fd_nonblock(socket_fd, false);
1381
1382         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1383         if (err < 0) {
1384                 *error = EXIT_STDIN;
1385                 return err;
1386         }
1387
1388         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389         if (err < 0) {
1390                 *error = EXIT_STDOUT;
1391                 return err;
1392         }
1393
1394         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1395         if (err < 0) {
1396                 *error = EXIT_STDERR;
1397                 return err;
1398         }
1399
1400         if (params->cgroup_path) {
1401                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1402                 if (err < 0) {
1403                         *error = EXIT_CGROUP;
1404                         return err;
1405                 }
1406         }
1407
1408         if (context->oom_score_adjust_set) {
1409                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1410
1411                 /* When we can't make this change due to EPERM, then
1412                  * let's silently skip over it. User namespaces
1413                  * prohibit write access to this file, and we
1414                  * shouldn't trip up over that. */
1415
1416                 sprintf(t, "%i", context->oom_score_adjust);
1417                 err = write_string_file("/proc/self/oom_score_adj", t);
1418                 if (err < 0 && err != -EPERM && err != EACCES) {
1419                         *error = EXIT_OOM_ADJUST;
1420                         return -errno;
1421                 }
1422         }
1423
1424         if (context->nice_set)
1425                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1426                         *error = EXIT_NICE;
1427                         return -errno;
1428                 }
1429
1430         if (context->cpu_sched_set) {
1431                 struct sched_param param = {
1432                         .sched_priority = context->cpu_sched_priority,
1433                 };
1434
1435                 err = sched_setscheduler(0,
1436                                          context->cpu_sched_policy |
1437                                          (context->cpu_sched_reset_on_fork ?
1438                                           SCHED_RESET_ON_FORK : 0),
1439                                          &param);
1440                 if (err < 0) {
1441                         *error = EXIT_SETSCHEDULER;
1442                         return -errno;
1443                 }
1444         }
1445
1446         if (context->cpuset)
1447                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1448                         *error = EXIT_CPUAFFINITY;
1449                         return -errno;
1450                 }
1451
1452         if (context->ioprio_set)
1453                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1454                         *error = EXIT_IOPRIO;
1455                         return -errno;
1456                 }
1457
1458         if (context->timer_slack_nsec != NSEC_INFINITY)
1459                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1460                         *error = EXIT_TIMERSLACK;
1461                         return -errno;
1462                 }
1463
1464         if (context->personality != 0xffffffffUL)
1465                 if (personality(context->personality) < 0) {
1466                         *error = EXIT_PERSONALITY;
1467                         return -errno;
1468                 }
1469
1470         if (context->utmp_id)
1471                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1472
1473         if (context->user && is_terminal_input(context->std_input)) {
1474                 err = chown_terminal(STDIN_FILENO, uid);
1475                 if (err < 0) {
1476                         *error = EXIT_STDIN;
1477                         return err;
1478                 }
1479         }
1480
1481 #ifdef ENABLE_KDBUS
1482         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1483                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1484
1485                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1486                 if (err < 0) {
1487                         *error = EXIT_BUS_ENDPOINT;
1488                         return err;
1489                 }
1490         }
1491 #endif
1492
1493         /* If delegation is enabled we'll pass ownership of the cgroup
1494          * (but only in systemd's own controller hierarchy!) to the
1495          * user of the new process. */
1496         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1497                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1498                 if (err < 0) {
1499                         *error = EXIT_CGROUP;
1500                         return err;
1501                 }
1502
1503
1504                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1505                 if (err < 0) {
1506                         *error = EXIT_CGROUP;
1507                         return err;
1508                 }
1509         }
1510
1511         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1512                 char **rt;
1513
1514                 STRV_FOREACH(rt, context->runtime_directory) {
1515                         _cleanup_free_ char *p;
1516
1517                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1518                         if (!p) {
1519                                 *error = EXIT_RUNTIME_DIRECTORY;
1520                                 return -ENOMEM;
1521                         }
1522
1523                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1524                         if (err < 0) {
1525                                 *error = EXIT_RUNTIME_DIRECTORY;
1526                                 return err;
1527                         }
1528                 }
1529         }
1530
1531         if (params->apply_permissions) {
1532                 err = enforce_groups(context, username, gid);
1533                 if (err < 0) {
1534                         *error = EXIT_GROUP;
1535                         return err;
1536                 }
1537         }
1538
1539         umask(context->umask);
1540
1541 #ifdef HAVE_PAM
1542         if (params->apply_permissions && context->pam_name && username) {
1543                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1544                 if (err < 0) {
1545                         *error = EXIT_PAM;
1546                         return err;
1547                 }
1548         }
1549 #endif
1550
1551         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1552                 err = setup_netns(runtime->netns_storage_socket);
1553                 if (err < 0) {
1554                         *error = EXIT_NETWORK;
1555                         return err;
1556                 }
1557         }
1558
1559         if (!strv_isempty(context->read_write_dirs) ||
1560             !strv_isempty(context->read_only_dirs) ||
1561             !strv_isempty(context->inaccessible_dirs) ||
1562             context->mount_flags != 0 ||
1563             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1564             params->bus_endpoint_path ||
1565             context->private_devices ||
1566             context->protect_system != PROTECT_SYSTEM_NO ||
1567             context->protect_home != PROTECT_HOME_NO) {
1568
1569                 char *tmp = NULL, *var = NULL;
1570
1571                 /* The runtime struct only contains the parent
1572                  * of the private /tmp, which is
1573                  * non-accessible to world users. Inside of it
1574                  * there's a /tmp that is sticky, and that's
1575                  * the one we want to use here. */
1576
1577                 if (context->private_tmp && runtime) {
1578                         if (runtime->tmp_dir)
1579                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1580                         if (runtime->var_tmp_dir)
1581                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1582                 }
1583
1584                 err = setup_namespace(
1585                                 context->read_write_dirs,
1586                                 context->read_only_dirs,
1587                                 context->inaccessible_dirs,
1588                                 tmp,
1589                                 var,
1590                                 params->bus_endpoint_path,
1591                                 context->private_devices,
1592                                 context->protect_home,
1593                                 context->protect_system,
1594                                 context->mount_flags);
1595
1596                 if (err == -EPERM)
1597                         log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1598                 else if (err < 0) {
1599                         *error = EXIT_NAMESPACE;
1600                         return err;
1601                 }
1602         }
1603
1604         if (params->apply_chroot) {
1605                 if (context->root_directory)
1606                         if (chroot(context->root_directory) < 0) {
1607                                 *error = EXIT_CHROOT;
1608                                 return -errno;
1609                         }
1610
1611                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1612                         *error = EXIT_CHDIR;
1613                         return -errno;
1614                 }
1615         } else {
1616                 _cleanup_free_ char *d = NULL;
1617
1618                 if (asprintf(&d, "%s/%s",
1619                              context->root_directory ? context->root_directory : "",
1620                              context->working_directory ? context->working_directory : "") < 0) {
1621                         *error = EXIT_MEMORY;
1622                         return -ENOMEM;
1623                 }
1624
1625                 if (chdir(d) < 0) {
1626                         *error = EXIT_CHDIR;
1627                         return -errno;
1628                 }
1629         }
1630
1631 #ifdef HAVE_SELINUX
1632         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1633                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1634                 if (err < 0) {
1635                         *error = EXIT_SELINUX_CONTEXT;
1636                         return err;
1637                 }
1638         }
1639 #endif
1640
1641         /* We repeat the fd closing here, to make sure that
1642          * nothing is leaked from the PAM modules. Note that
1643          * we are more aggressive this time since socket_fd
1644          * and the netns fds we don't need anymore. The custom
1645          * endpoint fd was needed to upload the policy and can
1646          * now be closed as well. */
1647         err = close_all_fds(fds, n_fds);
1648         if (err >= 0)
1649                 err = shift_fds(fds, n_fds);
1650         if (err >= 0)
1651                 err = flags_fds(fds, n_fds, context->non_blocking);
1652         if (err < 0) {
1653                 *error = EXIT_FDS;
1654                 return err;
1655         }
1656
1657         if (params->apply_permissions) {
1658
1659                 for (i = 0; i < _RLIMIT_MAX; i++) {
1660                         if (!context->rlimit[i])
1661                                 continue;
1662
1663                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1664                                 *error = EXIT_LIMITS;
1665                                 return -errno;
1666                         }
1667                 }
1668
1669                 if (context->capability_bounding_set_drop) {
1670                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1671                         if (err < 0) {
1672                                 *error = EXIT_CAPABILITIES;
1673                                 return err;
1674                         }
1675                 }
1676
1677 #ifdef HAVE_SMACK
1678                 if (context->smack_process_label) {
1679                         err = mac_smack_apply_pid(0, context->smack_process_label);
1680                         if (err < 0) {
1681                                 *error = EXIT_SMACK_PROCESS_LABEL;
1682                                 return err;
1683                         }
1684                 }
1685 #endif
1686
1687                 if (context->user) {
1688                         err = enforce_user(context, uid);
1689                         if (err < 0) {
1690                                 *error = EXIT_USER;
1691                                 return err;
1692                         }
1693                 }
1694
1695                 /* PR_GET_SECUREBITS is not privileged, while
1696                  * PR_SET_SECUREBITS is. So to suppress
1697                  * potential EPERMs we'll try not to call
1698                  * PR_SET_SECUREBITS unless necessary. */
1699                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1700                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1701                                 *error = EXIT_SECUREBITS;
1702                                 return -errno;
1703                         }
1704
1705                 if (context->capabilities)
1706                         if (cap_set_proc(context->capabilities) < 0) {
1707                                 *error = EXIT_CAPABILITIES;
1708                                 return -errno;
1709                         }
1710
1711                 if (context->no_new_privileges)
1712                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1713                                 *error = EXIT_NO_NEW_PRIVILEGES;
1714                                 return -errno;
1715                         }
1716
1717 #ifdef HAVE_SECCOMP
1718                 if (context->address_families_whitelist ||
1719                     !set_isempty(context->address_families)) {
1720                         err = apply_address_families(context);
1721                         if (err < 0) {
1722                                 *error = EXIT_ADDRESS_FAMILIES;
1723                                 return err;
1724                         }
1725                 }
1726
1727                 if (context->syscall_whitelist ||
1728                     !set_isempty(context->syscall_filter) ||
1729                     !set_isempty(context->syscall_archs)) {
1730                         err = apply_seccomp(context);
1731                         if (err < 0) {
1732                                 *error = EXIT_SECCOMP;
1733                                 return err;
1734                         }
1735                 }
1736 #endif
1737
1738 #ifdef HAVE_SELINUX
1739                 if (mac_selinux_use()) {
1740                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1741
1742                         if (exec_context) {
1743                                 err = setexeccon(exec_context);
1744                                 if (err < 0) {
1745                                         *error = EXIT_SELINUX_CONTEXT;
1746                                         return err;
1747                                 }
1748                         }
1749                 }
1750 #endif
1751
1752 #ifdef HAVE_APPARMOR
1753                 if (context->apparmor_profile && mac_apparmor_use()) {
1754                         err = aa_change_onexec(context->apparmor_profile);
1755                         if (err < 0 && !context->apparmor_profile_ignore) {
1756                                 *error = EXIT_APPARMOR_PROFILE;
1757                                 return -errno;
1758                         }
1759                 }
1760 #endif
1761         }
1762
1763         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1764         if (err < 0) {
1765                 *error = EXIT_MEMORY;
1766                 return err;
1767         }
1768
1769         final_env = strv_env_merge(5,
1770                                    params->environment,
1771                                    our_env,
1772                                    context->environment,
1773                                    files_env,
1774                                    pam_env,
1775                                    NULL);
1776         if (!final_env) {
1777                 *error = EXIT_MEMORY;
1778                 return -ENOMEM;
1779         }
1780
1781         final_argv = replace_env_argv(argv, final_env);
1782         if (!final_argv) {
1783                 *error = EXIT_MEMORY;
1784                 return -ENOMEM;
1785         }
1786
1787         final_env = strv_env_clean(final_env);
1788
1789         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1790                 _cleanup_free_ char *line;
1791
1792                 line = exec_command_line(final_argv);
1793                 if (line) {
1794                         log_open();
1795                         log_unit_struct(params->unit_id,
1796                                         LOG_DEBUG,
1797                                         "EXECUTABLE=%s", command->path,
1798                                         LOG_MESSAGE("Executing: %s", line),
1799                                         NULL);
1800                         log_close();
1801                 }
1802         }
1803         execve(command->path, final_argv, final_env);
1804         *error = EXIT_EXEC;
1805         return -errno;
1806 }
1807
1808 int exec_spawn(ExecCommand *command,
1809                const ExecContext *context,
1810                const ExecParameters *params,
1811                ExecRuntime *runtime,
1812                pid_t *ret) {
1813
1814         _cleanup_strv_free_ char **files_env = NULL;
1815         int *fds = NULL; unsigned n_fds = 0;
1816         char *line, **argv;
1817         int socket_fd;
1818         pid_t pid;
1819         int err;
1820
1821         assert(command);
1822         assert(context);
1823         assert(ret);
1824         assert(params);
1825         assert(params->fds || params->n_fds <= 0);
1826
1827         if (context->std_input == EXEC_INPUT_SOCKET ||
1828             context->std_output == EXEC_OUTPUT_SOCKET ||
1829             context->std_error == EXEC_OUTPUT_SOCKET) {
1830
1831                 if (params->n_fds != 1)
1832                         return -EINVAL;
1833
1834                 socket_fd = params->fds[0];
1835         } else {
1836                 socket_fd = -1;
1837                 fds = params->fds;
1838                 n_fds = params->n_fds;
1839         }
1840
1841         err = exec_context_load_environment(context, params->unit_id, &files_env);
1842         if (err < 0) {
1843                 log_unit_struct(params->unit_id,
1844                                 LOG_ERR,
1845                                 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1846                                 LOG_ERRNO(-err),
1847                                 NULL);
1848                 return err;
1849         }
1850
1851         argv = params->argv ?: command->argv;
1852
1853         line = exec_command_line(argv);
1854         if (!line)
1855                 return log_oom();
1856
1857         log_unit_struct(params->unit_id,
1858                         LOG_DEBUG,
1859                         "EXECUTABLE=%s", command->path,
1860                         LOG_MESSAGE("About to execute: %s", line),
1861                         NULL);
1862         free(line);
1863
1864         pid = fork();
1865         if (pid < 0)
1866                 return -errno;
1867
1868         if (pid == 0) {
1869                 int r;
1870
1871                 err = exec_child(command,
1872                                  context,
1873                                  params,
1874                                  runtime,
1875                                  argv,
1876                                  socket_fd,
1877                                  fds, n_fds,
1878                                  files_env,
1879                                  &r);
1880                 if (r != 0) {
1881                         log_open();
1882                         log_struct(LOG_ERR,
1883                                    LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1884                                    "EXECUTABLE=%s", command->path,
1885                                    LOG_MESSAGE("Failed at step %s spawning %s: %s",
1886                                                exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1887                                                command->path, strerror(-err)),
1888                                    LOG_ERRNO(-err),
1889                                    NULL);
1890                         log_close();
1891                 }
1892
1893                 _exit(r);
1894         }
1895
1896         log_unit_struct(params->unit_id,
1897                         LOG_DEBUG,
1898                         LOG_MESSAGE("Forked %s as "PID_FMT,
1899                                     command->path, pid),
1900                         NULL);
1901
1902         /* We add the new process to the cgroup both in the child (so
1903          * that we can be sure that no user code is ever executed
1904          * outside of the cgroup) and in the parent (so that we can be
1905          * sure that when we kill the cgroup the process will be
1906          * killed too). */
1907         if (params->cgroup_path)
1908                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1909
1910         exec_status_start(&command->exec_status, pid);
1911
1912         *ret = pid;
1913         return 0;
1914 }
1915
1916 void exec_context_init(ExecContext *c) {
1917         assert(c);
1918
1919         c->umask = 0022;
1920         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1921         c->cpu_sched_policy = SCHED_OTHER;
1922         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1923         c->syslog_level_prefix = true;
1924         c->ignore_sigpipe = true;
1925         c->timer_slack_nsec = NSEC_INFINITY;
1926         c->personality = 0xffffffffUL;
1927         c->runtime_directory_mode = 0755;
1928 }
1929
1930 void exec_context_done(ExecContext *c) {
1931         unsigned l;
1932
1933         assert(c);
1934
1935         strv_free(c->environment);
1936         c->environment = NULL;
1937
1938         strv_free(c->environment_files);
1939         c->environment_files = NULL;
1940
1941         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1942                 free(c->rlimit[l]);
1943                 c->rlimit[l] = NULL;
1944         }
1945
1946         free(c->working_directory);
1947         c->working_directory = NULL;
1948         free(c->root_directory);
1949         c->root_directory = NULL;
1950
1951         free(c->tty_path);
1952         c->tty_path = NULL;
1953
1954         free(c->syslog_identifier);
1955         c->syslog_identifier = NULL;
1956
1957         free(c->user);
1958         c->user = NULL;
1959
1960         free(c->group);
1961         c->group = NULL;
1962
1963         strv_free(c->supplementary_groups);
1964         c->supplementary_groups = NULL;
1965
1966         free(c->pam_name);
1967         c->pam_name = NULL;
1968
1969         if (c->capabilities) {
1970                 cap_free(c->capabilities);
1971                 c->capabilities = NULL;
1972         }
1973
1974         strv_free(c->read_only_dirs);
1975         c->read_only_dirs = NULL;
1976
1977         strv_free(c->read_write_dirs);
1978         c->read_write_dirs = NULL;
1979
1980         strv_free(c->inaccessible_dirs);
1981         c->inaccessible_dirs = NULL;
1982
1983         if (c->cpuset)
1984                 CPU_FREE(c->cpuset);
1985
1986         free(c->utmp_id);
1987         c->utmp_id = NULL;
1988
1989         free(c->selinux_context);
1990         c->selinux_context = NULL;
1991
1992         free(c->apparmor_profile);
1993         c->apparmor_profile = NULL;
1994
1995         set_free(c->syscall_filter);
1996         c->syscall_filter = NULL;
1997
1998         set_free(c->syscall_archs);
1999         c->syscall_archs = NULL;
2000
2001         set_free(c->address_families);
2002         c->address_families = NULL;
2003
2004         strv_free(c->runtime_directory);
2005         c->runtime_directory = NULL;
2006
2007         bus_endpoint_free(c->bus_endpoint);
2008         c->bus_endpoint = NULL;
2009 }
2010
2011 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2012         char **i;
2013
2014         assert(c);
2015
2016         if (!runtime_prefix)
2017                 return 0;
2018
2019         STRV_FOREACH(i, c->runtime_directory) {
2020                 _cleanup_free_ char *p;
2021
2022                 p = strjoin(runtime_prefix, "/", *i, NULL);
2023                 if (!p)
2024                         return -ENOMEM;
2025
2026                 /* We execute this synchronously, since we need to be
2027                  * sure this is gone when we start the service
2028                  * next. */
2029                 rm_rf(p, false, true, false);
2030         }
2031
2032         return 0;
2033 }
2034
2035 void exec_command_done(ExecCommand *c) {
2036         assert(c);
2037
2038         free(c->path);
2039         c->path = NULL;
2040
2041         strv_free(c->argv);
2042         c->argv = NULL;
2043 }
2044
2045 void exec_command_done_array(ExecCommand *c, unsigned n) {
2046         unsigned i;
2047
2048         for (i = 0; i < n; i++)
2049                 exec_command_done(c+i);
2050 }
2051
2052 ExecCommand* exec_command_free_list(ExecCommand *c) {
2053         ExecCommand *i;
2054
2055         while ((i = c)) {
2056                 LIST_REMOVE(command, c, i);
2057                 exec_command_done(i);
2058                 free(i);
2059         }
2060
2061         return NULL;
2062 }
2063
2064 void exec_command_free_array(ExecCommand **c, unsigned n) {
2065         unsigned i;
2066
2067         for (i = 0; i < n; i++)
2068                 c[i] = exec_command_free_list(c[i]);
2069 }
2070
2071 typedef struct InvalidEnvInfo {
2072         const char *unit_id;
2073         const char *path;
2074 } InvalidEnvInfo;
2075
2076 static void invalid_env(const char *p, void *userdata) {
2077         InvalidEnvInfo *info = userdata;
2078
2079         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2080 }
2081
2082 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2083         char **i, **r = NULL;
2084
2085         assert(c);
2086         assert(l);
2087
2088         STRV_FOREACH(i, c->environment_files) {
2089                 char *fn;
2090                 int k;
2091                 bool ignore = false;
2092                 char **p;
2093                 _cleanup_globfree_ glob_t pglob = {};
2094                 int count, n;
2095
2096                 fn = *i;
2097
2098                 if (fn[0] == '-') {
2099                         ignore = true;
2100                         fn ++;
2101                 }
2102
2103                 if (!path_is_absolute(fn)) {
2104                         if (ignore)
2105                                 continue;
2106
2107                         strv_free(r);
2108                         return -EINVAL;
2109                 }
2110
2111                 /* Filename supports globbing, take all matching files */
2112                 errno = 0;
2113                 if (glob(fn, 0, NULL, &pglob) != 0) {
2114                         if (ignore)
2115                                 continue;
2116
2117                         strv_free(r);
2118                         return errno ? -errno : -EINVAL;
2119                 }
2120                 count = pglob.gl_pathc;
2121                 if (count == 0) {
2122                         if (ignore)
2123                                 continue;
2124
2125                         strv_free(r);
2126                         return -EINVAL;
2127                 }
2128                 for (n = 0; n < count; n++) {
2129                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2130                         if (k < 0) {
2131                                 if (ignore)
2132                                         continue;
2133
2134                                 strv_free(r);
2135                                 return k;
2136                         }
2137                         /* Log invalid environment variables with filename */
2138                         if (p) {
2139                                 InvalidEnvInfo info = {
2140                                         .unit_id = unit_id,
2141                                         .path = pglob.gl_pathv[n]
2142                                 };
2143
2144                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2145                         }
2146
2147                         if (r == NULL)
2148                                 r = p;
2149                         else {
2150                                 char **m;
2151
2152                                 m = strv_env_merge(2, r, p);
2153                                 strv_free(r);
2154                                 strv_free(p);
2155                                 if (!m)
2156                                         return -ENOMEM;
2157
2158                                 r = m;
2159                         }
2160                 }
2161         }
2162
2163         *l = r;
2164
2165         return 0;
2166 }
2167
2168 static bool tty_may_match_dev_console(const char *tty) {
2169         _cleanup_free_ char *active = NULL;
2170        char *console;
2171
2172         if (startswith(tty, "/dev/"))
2173                 tty += 5;
2174
2175         /* trivial identity? */
2176         if (streq(tty, "console"))
2177                 return true;
2178
2179         console = resolve_dev_console(&active);
2180         /* if we could not resolve, assume it may */
2181         if (!console)
2182                 return true;
2183
2184         /* "tty0" means the active VC, so it may be the same sometimes */
2185         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2186 }
2187
2188 bool exec_context_may_touch_console(ExecContext *ec) {
2189         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2190                 is_terminal_input(ec->std_input) ||
2191                 is_terminal_output(ec->std_output) ||
2192                 is_terminal_output(ec->std_error)) &&
2193                tty_may_match_dev_console(tty_path(ec));
2194 }
2195
2196 static void strv_fprintf(FILE *f, char **l) {
2197         char **g;
2198
2199         assert(f);
2200
2201         STRV_FOREACH(g, l)
2202                 fprintf(f, " %s", *g);
2203 }
2204
2205 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2206         char **e;
2207         unsigned i;
2208
2209         assert(c);
2210         assert(f);
2211
2212         prefix = strempty(prefix);
2213
2214         fprintf(f,
2215                 "%sUMask: %04o\n"
2216                 "%sWorkingDirectory: %s\n"
2217                 "%sRootDirectory: %s\n"
2218                 "%sNonBlocking: %s\n"
2219                 "%sPrivateTmp: %s\n"
2220                 "%sPrivateNetwork: %s\n"
2221                 "%sPrivateDevices: %s\n"
2222                 "%sProtectHome: %s\n"
2223                 "%sProtectSystem: %s\n"
2224                 "%sIgnoreSIGPIPE: %s\n",
2225                 prefix, c->umask,
2226                 prefix, c->working_directory ? c->working_directory : "/",
2227                 prefix, c->root_directory ? c->root_directory : "/",
2228                 prefix, yes_no(c->non_blocking),
2229                 prefix, yes_no(c->private_tmp),
2230                 prefix, yes_no(c->private_network),
2231                 prefix, yes_no(c->private_devices),
2232                 prefix, protect_home_to_string(c->protect_home),
2233                 prefix, protect_system_to_string(c->protect_system),
2234                 prefix, yes_no(c->ignore_sigpipe));
2235
2236         STRV_FOREACH(e, c->environment)
2237                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2238
2239         STRV_FOREACH(e, c->environment_files)
2240                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2241
2242         if (c->nice_set)
2243                 fprintf(f,
2244                         "%sNice: %i\n",
2245                         prefix, c->nice);
2246
2247         if (c->oom_score_adjust_set)
2248                 fprintf(f,
2249                         "%sOOMScoreAdjust: %i\n",
2250                         prefix, c->oom_score_adjust);
2251
2252         for (i = 0; i < RLIM_NLIMITS; i++)
2253                 if (c->rlimit[i])
2254                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2255                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2256
2257         if (c->ioprio_set) {
2258                 _cleanup_free_ char *class_str = NULL;
2259
2260                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2261                 fprintf(f,
2262                         "%sIOSchedulingClass: %s\n"
2263                         "%sIOPriority: %i\n",
2264                         prefix, strna(class_str),
2265                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2266         }
2267
2268         if (c->cpu_sched_set) {
2269                 _cleanup_free_ char *policy_str = NULL;
2270
2271                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2272                 fprintf(f,
2273                         "%sCPUSchedulingPolicy: %s\n"
2274                         "%sCPUSchedulingPriority: %i\n"
2275                         "%sCPUSchedulingResetOnFork: %s\n",
2276                         prefix, strna(policy_str),
2277                         prefix, c->cpu_sched_priority,
2278                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2279         }
2280
2281         if (c->cpuset) {
2282                 fprintf(f, "%sCPUAffinity:", prefix);
2283                 for (i = 0; i < c->cpuset_ncpus; i++)
2284                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2285                                 fprintf(f, " %u", i);
2286                 fputs("\n", f);
2287         }
2288
2289         if (c->timer_slack_nsec != NSEC_INFINITY)
2290                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2291
2292         fprintf(f,
2293                 "%sStandardInput: %s\n"
2294                 "%sStandardOutput: %s\n"
2295                 "%sStandardError: %s\n",
2296                 prefix, exec_input_to_string(c->std_input),
2297                 prefix, exec_output_to_string(c->std_output),
2298                 prefix, exec_output_to_string(c->std_error));
2299
2300         if (c->tty_path)
2301                 fprintf(f,
2302                         "%sTTYPath: %s\n"
2303                         "%sTTYReset: %s\n"
2304                         "%sTTYVHangup: %s\n"
2305                         "%sTTYVTDisallocate: %s\n",
2306                         prefix, c->tty_path,
2307                         prefix, yes_no(c->tty_reset),
2308                         prefix, yes_no(c->tty_vhangup),
2309                         prefix, yes_no(c->tty_vt_disallocate));
2310
2311         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2312             c->std_output == EXEC_OUTPUT_KMSG ||
2313             c->std_output == EXEC_OUTPUT_JOURNAL ||
2314             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2315             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2316             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2317             c->std_error == EXEC_OUTPUT_SYSLOG ||
2318             c->std_error == EXEC_OUTPUT_KMSG ||
2319             c->std_error == EXEC_OUTPUT_JOURNAL ||
2320             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2321             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2322             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2323
2324                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2325
2326                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2327                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2328
2329                 fprintf(f,
2330                         "%sSyslogFacility: %s\n"
2331                         "%sSyslogLevel: %s\n",
2332                         prefix, strna(fac_str),
2333                         prefix, strna(lvl_str));
2334         }
2335
2336         if (c->capabilities) {
2337                 _cleanup_cap_free_charp_ char *t;
2338
2339                 t = cap_to_text(c->capabilities, NULL);
2340                 if (t)
2341                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2342         }
2343
2344         if (c->secure_bits)
2345                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2346                         prefix,
2347                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2348                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2349                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2350                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2351                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2352                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2353
2354         if (c->capability_bounding_set_drop) {
2355                 unsigned long l;
2356                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2357
2358                 for (l = 0; l <= cap_last_cap(); l++)
2359                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2360                                 fprintf(f, " %s", strna(capability_to_name(l)));
2361
2362                 fputs("\n", f);
2363         }
2364
2365         if (c->user)
2366                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2367         if (c->group)
2368                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2369
2370         if (strv_length(c->supplementary_groups) > 0) {
2371                 fprintf(f, "%sSupplementaryGroups:", prefix);
2372                 strv_fprintf(f, c->supplementary_groups);
2373                 fputs("\n", f);
2374         }
2375
2376         if (c->pam_name)
2377                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2378
2379         if (strv_length(c->read_write_dirs) > 0) {
2380                 fprintf(f, "%sReadWriteDirs:", prefix);
2381                 strv_fprintf(f, c->read_write_dirs);
2382                 fputs("\n", f);
2383         }
2384
2385         if (strv_length(c->read_only_dirs) > 0) {
2386                 fprintf(f, "%sReadOnlyDirs:", prefix);
2387                 strv_fprintf(f, c->read_only_dirs);
2388                 fputs("\n", f);
2389         }
2390
2391         if (strv_length(c->inaccessible_dirs) > 0) {
2392                 fprintf(f, "%sInaccessibleDirs:", prefix);
2393                 strv_fprintf(f, c->inaccessible_dirs);
2394                 fputs("\n", f);
2395         }
2396
2397         if (c->utmp_id)
2398                 fprintf(f,
2399                         "%sUtmpIdentifier: %s\n",
2400                         prefix, c->utmp_id);
2401
2402         if (c->selinux_context)
2403                 fprintf(f,
2404                         "%sSELinuxContext: %s%s\n",
2405                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2406
2407         if (c->personality != 0xffffffffUL)
2408                 fprintf(f,
2409                         "%sPersonality: %s\n",
2410                         prefix, strna(personality_to_string(c->personality)));
2411
2412         if (c->syscall_filter) {
2413 #ifdef HAVE_SECCOMP
2414                 Iterator j;
2415                 void *id;
2416                 bool first = true;
2417 #endif
2418
2419                 fprintf(f,
2420                         "%sSystemCallFilter: ",
2421                         prefix);
2422
2423                 if (!c->syscall_whitelist)
2424                         fputc('~', f);
2425
2426 #ifdef HAVE_SECCOMP
2427                 SET_FOREACH(id, c->syscall_filter, j) {
2428                         _cleanup_free_ char *name = NULL;
2429
2430                         if (first)
2431                                 first = false;
2432                         else
2433                                 fputc(' ', f);
2434
2435                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2436                         fputs(strna(name), f);
2437                 }
2438 #endif
2439
2440                 fputc('\n', f);
2441         }
2442
2443         if (c->syscall_archs) {
2444 #ifdef HAVE_SECCOMP
2445                 Iterator j;
2446                 void *id;
2447 #endif
2448
2449                 fprintf(f,
2450                         "%sSystemCallArchitectures:",
2451                         prefix);
2452
2453 #ifdef HAVE_SECCOMP
2454                 SET_FOREACH(id, c->syscall_archs, j)
2455                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2456 #endif
2457                 fputc('\n', f);
2458         }
2459
2460         if (c->syscall_errno != 0)
2461                 fprintf(f,
2462                         "%sSystemCallErrorNumber: %s\n",
2463                         prefix, strna(errno_to_name(c->syscall_errno)));
2464
2465         if (c->apparmor_profile)
2466                 fprintf(f,
2467                         "%sAppArmorProfile: %s%s\n",
2468                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2469 }
2470
2471 bool exec_context_maintains_privileges(ExecContext *c) {
2472         assert(c);
2473
2474         /* Returns true if the process forked off would run run under
2475          * an unchanged UID or as root. */
2476
2477         if (!c->user)
2478                 return true;
2479
2480         if (streq(c->user, "root") || streq(c->user, "0"))
2481                 return true;
2482
2483         return false;
2484 }
2485
2486 void exec_status_start(ExecStatus *s, pid_t pid) {
2487         assert(s);
2488
2489         zero(*s);
2490         s->pid = pid;
2491         dual_timestamp_get(&s->start_timestamp);
2492 }
2493
2494 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2495         assert(s);
2496
2497         if (s->pid && s->pid != pid)
2498                 zero(*s);
2499
2500         s->pid = pid;
2501         dual_timestamp_get(&s->exit_timestamp);
2502
2503         s->code = code;
2504         s->status = status;
2505
2506         if (context) {
2507                 if (context->utmp_id)
2508                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2509
2510                 exec_context_tty_reset(context);
2511         }
2512 }
2513
2514 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2515         char buf[FORMAT_TIMESTAMP_MAX];
2516
2517         assert(s);
2518         assert(f);
2519
2520         if (s->pid <= 0)
2521                 return;
2522
2523         prefix = strempty(prefix);
2524
2525         fprintf(f,
2526                 "%sPID: "PID_FMT"\n",
2527                 prefix, s->pid);
2528
2529         if (s->start_timestamp.realtime > 0)
2530                 fprintf(f,
2531                         "%sStart Timestamp: %s\n",
2532                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2533
2534         if (s->exit_timestamp.realtime > 0)
2535                 fprintf(f,
2536                         "%sExit Timestamp: %s\n"
2537                         "%sExit Code: %s\n"
2538                         "%sExit Status: %i\n",
2539                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2540                         prefix, sigchld_code_to_string(s->code),
2541                         prefix, s->status);
2542 }
2543
2544 char *exec_command_line(char **argv) {
2545         size_t k;
2546         char *n, *p, **a;
2547         bool first = true;
2548
2549         assert(argv);
2550
2551         k = 1;
2552         STRV_FOREACH(a, argv)
2553                 k += strlen(*a)+3;
2554
2555         if (!(n = new(char, k)))
2556                 return NULL;
2557
2558         p = n;
2559         STRV_FOREACH(a, argv) {
2560
2561                 if (!first)
2562                         *(p++) = ' ';
2563                 else
2564                         first = false;
2565
2566                 if (strpbrk(*a, WHITESPACE)) {
2567                         *(p++) = '\'';
2568                         p = stpcpy(p, *a);
2569                         *(p++) = '\'';
2570                 } else
2571                         p = stpcpy(p, *a);
2572
2573         }
2574
2575         *p = 0;
2576
2577         /* FIXME: this doesn't really handle arguments that have
2578          * spaces and ticks in them */
2579
2580         return n;
2581 }
2582
2583 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2584         _cleanup_free_ char *cmd = NULL;
2585         const char *prefix2;
2586
2587         assert(c);
2588         assert(f);
2589
2590         prefix = strempty(prefix);
2591         prefix2 = strappenda(prefix, "\t");
2592
2593         cmd = exec_command_line(c->argv);
2594         fprintf(f,
2595                 "%sCommand Line: %s\n",
2596                 prefix, cmd ? cmd : strerror(ENOMEM));
2597
2598         exec_status_dump(&c->exec_status, f, prefix2);
2599 }
2600
2601 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2602         assert(f);
2603
2604         prefix = strempty(prefix);
2605
2606         LIST_FOREACH(command, c, c)
2607                 exec_command_dump(c, f, prefix);
2608 }
2609
2610 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2611         ExecCommand *end;
2612
2613         assert(l);
2614         assert(e);
2615
2616         if (*l) {
2617                 /* It's kind of important, that we keep the order here */
2618                 LIST_FIND_TAIL(command, *l, end);
2619                 LIST_INSERT_AFTER(command, *l, end, e);
2620         } else
2621               *l = e;
2622 }
2623
2624 int exec_command_set(ExecCommand *c, const char *path, ...) {
2625         va_list ap;
2626         char **l, *p;
2627
2628         assert(c);
2629         assert(path);
2630
2631         va_start(ap, path);
2632         l = strv_new_ap(path, ap);
2633         va_end(ap);
2634
2635         if (!l)
2636                 return -ENOMEM;
2637
2638         p = strdup(path);
2639         if (!p) {
2640                 strv_free(l);
2641                 return -ENOMEM;
2642         }
2643
2644         free(c->path);
2645         c->path = p;
2646
2647         strv_free(c->argv);
2648         c->argv = l;
2649
2650         return 0;
2651 }
2652
2653 int exec_command_append(ExecCommand *c, const char *path, ...) {
2654         _cleanup_strv_free_ char **l = NULL;
2655         va_list ap;
2656         int r;
2657
2658         assert(c);
2659         assert(path);
2660
2661         va_start(ap, path);
2662         l = strv_new_ap(path, ap);
2663         va_end(ap);
2664
2665         if (!l)
2666                 return -ENOMEM;
2667
2668         r = strv_extend_strv(&c->argv, l);
2669         if (r < 0)
2670                 return r;
2671
2672         return 0;
2673 }
2674
2675
2676 static int exec_runtime_allocate(ExecRuntime **rt) {
2677
2678         if (*rt)
2679                 return 0;
2680
2681         *rt = new0(ExecRuntime, 1);
2682         if (!*rt)
2683                 return -ENOMEM;
2684
2685         (*rt)->n_ref = 1;
2686         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2687
2688         return 0;
2689 }
2690
2691 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2692         int r;
2693
2694         assert(rt);
2695         assert(c);
2696         assert(id);
2697
2698         if (*rt)
2699                 return 1;
2700
2701         if (!c->private_network && !c->private_tmp)
2702                 return 0;
2703
2704         r = exec_runtime_allocate(rt);
2705         if (r < 0)
2706                 return r;
2707
2708         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2709                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2710                         return -errno;
2711         }
2712
2713         if (c->private_tmp && !(*rt)->tmp_dir) {
2714                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2715                 if (r < 0)
2716                         return r;
2717         }
2718
2719         return 1;
2720 }
2721
2722 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2723         assert(r);
2724         assert(r->n_ref > 0);
2725
2726         r->n_ref++;
2727         return r;
2728 }
2729
2730 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2731
2732         if (!r)
2733                 return NULL;
2734
2735         assert(r->n_ref > 0);
2736
2737         r->n_ref--;
2738         if (r->n_ref <= 0) {
2739                 free(r->tmp_dir);
2740                 free(r->var_tmp_dir);
2741                 safe_close_pair(r->netns_storage_socket);
2742                 free(r);
2743         }
2744
2745         return NULL;
2746 }
2747
2748 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2749         assert(u);
2750         assert(f);
2751         assert(fds);
2752
2753         if (!rt)
2754                 return 0;
2755
2756         if (rt->tmp_dir)
2757                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2758
2759         if (rt->var_tmp_dir)
2760                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2761
2762         if (rt->netns_storage_socket[0] >= 0) {
2763                 int copy;
2764
2765                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2766                 if (copy < 0)
2767                         return copy;
2768
2769                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2770         }
2771
2772         if (rt->netns_storage_socket[1] >= 0) {
2773                 int copy;
2774
2775                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2776                 if (copy < 0)
2777                         return copy;
2778
2779                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2780         }
2781
2782         return 0;
2783 }
2784
2785 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2786         int r;
2787
2788         assert(rt);
2789         assert(key);
2790         assert(value);
2791
2792         if (streq(key, "tmp-dir")) {
2793                 char *copy;
2794
2795                 r = exec_runtime_allocate(rt);
2796                 if (r < 0)
2797                         return r;
2798
2799                 copy = strdup(value);
2800                 if (!copy)
2801                         return log_oom();
2802
2803                 free((*rt)->tmp_dir);
2804                 (*rt)->tmp_dir = copy;
2805
2806         } else if (streq(key, "var-tmp-dir")) {
2807                 char *copy;
2808
2809                 r = exec_runtime_allocate(rt);
2810                 if (r < 0)
2811                         return r;
2812
2813                 copy = strdup(value);
2814                 if (!copy)
2815                         return log_oom();
2816
2817                 free((*rt)->var_tmp_dir);
2818                 (*rt)->var_tmp_dir = copy;
2819
2820         } else if (streq(key, "netns-socket-0")) {
2821                 int fd;
2822
2823                 r = exec_runtime_allocate(rt);
2824                 if (r < 0)
2825                         return r;
2826
2827                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2828                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2829                 else {
2830                         safe_close((*rt)->netns_storage_socket[0]);
2831                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2832                 }
2833         } else if (streq(key, "netns-socket-1")) {
2834                 int fd;
2835
2836                 r = exec_runtime_allocate(rt);
2837                 if (r < 0)
2838                         return r;
2839
2840                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2841                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2842                 else {
2843                         safe_close((*rt)->netns_storage_socket[1]);
2844                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2845                 }
2846         } else
2847                 return 0;
2848
2849         return 1;
2850 }
2851
2852 static void *remove_tmpdir_thread(void *p) {
2853         _cleanup_free_ char *path = p;
2854
2855         rm_rf_dangerous(path, false, true, false);
2856         return NULL;
2857 }
2858
2859 void exec_runtime_destroy(ExecRuntime *rt) {
2860         int r;
2861
2862         if (!rt)
2863                 return;
2864
2865         /* If there are multiple users of this, let's leave the stuff around */
2866         if (rt->n_ref > 1)
2867                 return;
2868
2869         if (rt->tmp_dir) {
2870                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2871
2872                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2873                 if (r < 0) {
2874                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2875                         free(rt->tmp_dir);
2876                 }
2877
2878                 rt->tmp_dir = NULL;
2879         }
2880
2881         if (rt->var_tmp_dir) {
2882                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2883
2884                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2885                 if (r < 0) {
2886                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2887                         free(rt->var_tmp_dir);
2888                 }
2889
2890                 rt->var_tmp_dir = NULL;
2891         }
2892
2893         safe_close_pair(rt->netns_storage_socket);
2894 }
2895
2896 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2897         [EXEC_INPUT_NULL] = "null",
2898         [EXEC_INPUT_TTY] = "tty",
2899         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2900         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2901         [EXEC_INPUT_SOCKET] = "socket"
2902 };
2903
2904 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2905
2906 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2907         [EXEC_OUTPUT_INHERIT] = "inherit",
2908         [EXEC_OUTPUT_NULL] = "null",
2909         [EXEC_OUTPUT_TTY] = "tty",
2910         [EXEC_OUTPUT_SYSLOG] = "syslog",
2911         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2912         [EXEC_OUTPUT_KMSG] = "kmsg",
2913         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2914         [EXEC_OUTPUT_JOURNAL] = "journal",
2915         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2916         [EXEC_OUTPUT_SOCKET] = "socket"
2917 };
2918
2919 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);