chiark / gitweb /
027e3319ed1421244f5cd79e14fa3badd2657857
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <sys/personality.h>
35
36 #ifdef HAVE_PAM
37 #include <security/pam_appl.h>
38 #endif
39
40 #ifdef HAVE_SELINUX
41 #include <selinux/selinux.h>
42 #endif
43
44 #ifdef HAVE_SECCOMP
45 #include <seccomp.h>
46 #endif
47
48 #ifdef HAVE_APPARMOR
49 #include <sys/apparmor.h>
50 #endif
51
52 #include "execute.h"
53 #include "strv.h"
54 #include "macro.h"
55 #include "capability.h"
56 #include "util.h"
57 #include "log.h"
58 #include "sd-messages.h"
59 #include "ioprio.h"
60 #include "securebits.h"
61 #include "namespace.h"
62 #include "exit-status.h"
63 #include "missing.h"
64 #include "def.h"
65 #include "path-util.h"
66 #include "env-util.h"
67 #include "fileio.h"
68 #include "unit.h"
69 #include "async.h"
70 #include "selinux-util.h"
71 #include "errno-list.h"
72 #include "af-list.h"
73 #include "mkdir.h"
74 #include "smack-util.h"
75 #include "bus-endpoint.h"
76 #include "cap-list.h"
77
78 #ifdef HAVE_APPARMOR
79 #include "apparmor-util.h"
80 #endif
81
82 #ifdef HAVE_SECCOMP
83 #include "seccomp-util.h"
84 #endif
85
86 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
87 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
88
89 /* This assumes there is a 'tty' group */
90 #define TTY_MODE 0620
91
92 #define SNDBUF_SIZE (8*1024*1024)
93
94 static int shift_fds(int fds[], unsigned n_fds) {
95         int start, restart_from;
96
97         if (n_fds <= 0)
98                 return 0;
99
100         /* Modifies the fds array! (sorts it) */
101
102         assert(fds);
103
104         start = 0;
105         for (;;) {
106                 int i;
107
108                 restart_from = -1;
109
110                 for (i = start; i < (int) n_fds; i++) {
111                         int nfd;
112
113                         /* Already at right index? */
114                         if (fds[i] == i+3)
115                                 continue;
116
117                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
118                                 return -errno;
119
120                         safe_close(fds[i]);
121                         fds[i] = nfd;
122
123                         /* Hmm, the fd we wanted isn't free? Then
124                          * let's remember that and try again from here */
125                         if (nfd != i+3 && restart_from < 0)
126                                 restart_from = i;
127                 }
128
129                 if (restart_from < 0)
130                         break;
131
132                 start = restart_from;
133         }
134
135         return 0;
136 }
137
138 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
139         unsigned i;
140         int r;
141
142         if (n_fds <= 0)
143                 return 0;
144
145         assert(fds);
146
147         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
148
149         for (i = 0; i < n_fds; i++) {
150
151                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
152                         return r;
153
154                 /* We unconditionally drop FD_CLOEXEC from the fds,
155                  * since after all we want to pass these fds to our
156                  * children */
157
158                 if ((r = fd_cloexec(fds[i], false)) < 0)
159                         return r;
160         }
161
162         return 0;
163 }
164
165 _pure_ static const char *tty_path(const ExecContext *context) {
166         assert(context);
167
168         if (context->tty_path)
169                 return context->tty_path;
170
171         return "/dev/console";
172 }
173
174 static void exec_context_tty_reset(const ExecContext *context) {
175         assert(context);
176
177         if (context->tty_vhangup)
178                 terminal_vhangup(tty_path(context));
179
180         if (context->tty_reset)
181                 reset_terminal(tty_path(context));
182
183         if (context->tty_vt_disallocate && context->tty_path)
184                 vt_disallocate(context->tty_path);
185 }
186
187 static bool is_terminal_output(ExecOutput o) {
188         return
189                 o == EXEC_OUTPUT_TTY ||
190                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
191                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
192                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
193 }
194
195 static int open_null_as(int flags, int nfd) {
196         int fd, r;
197
198         assert(nfd >= 0);
199
200         fd = open("/dev/null", flags|O_NOCTTY);
201         if (fd < 0)
202                 return -errno;
203
204         if (fd != nfd) {
205                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
206                 safe_close(fd);
207         } else
208                 r = nfd;
209
210         return r;
211 }
212
213 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
214         union sockaddr_union sa = {
215                 .un.sun_family = AF_UNIX,
216                 .un.sun_path = "/run/systemd/journal/stdout",
217         };
218         uid_t olduid = UID_INVALID;
219         gid_t oldgid = GID_INVALID;
220         int r;
221
222         if (gid != GID_INVALID) {
223                 oldgid = getgid();
224
225                 r = setegid(gid);
226                 if (r < 0)
227                         return -errno;
228         }
229
230         if (uid != UID_INVALID) {
231                 olduid = getuid();
232
233                 r = seteuid(uid);
234                 if (r < 0) {
235                         r = -errno;
236                         goto restore_gid;
237                 }
238         }
239
240         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
241         if (r < 0)
242                 r = -errno;
243
244         /* If we fail to restore the uid or gid, things will likely
245            fail later on. This should only happen if an LSM interferes. */
246
247         if (uid != UID_INVALID)
248                 (void) seteuid(olduid);
249
250  restore_gid:
251         if (gid != GID_INVALID)
252                 (void) setegid(oldgid);
253
254         return r;
255 }
256
257 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
258         int fd, r;
259
260         assert(context);
261         assert(output < _EXEC_OUTPUT_MAX);
262         assert(ident);
263         assert(nfd >= 0);
264
265         fd = socket(AF_UNIX, SOCK_STREAM, 0);
266         if (fd < 0)
267                 return -errno;
268
269         r = connect_journal_socket(fd, uid, gid);
270         if (r < 0)
271                 return r;
272
273         if (shutdown(fd, SHUT_RD) < 0) {
274                 safe_close(fd);
275                 return -errno;
276         }
277
278         fd_inc_sndbuf(fd, SNDBUF_SIZE);
279
280         dprintf(fd,
281                 "%s\n"
282                 "%s\n"
283                 "%i\n"
284                 "%i\n"
285                 "%i\n"
286                 "%i\n"
287                 "%i\n",
288                 context->syslog_identifier ? context->syslog_identifier : ident,
289                 unit_id,
290                 context->syslog_priority,
291                 !!context->syslog_level_prefix,
292                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
293                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
294                 is_terminal_output(output));
295
296         if (fd != nfd) {
297                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
298                 safe_close(fd);
299         } else
300                 r = nfd;
301
302         return r;
303 }
304 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
305         int fd, r;
306
307         assert(path);
308         assert(nfd >= 0);
309
310         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
311                 return fd;
312
313         if (fd != nfd) {
314                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
315                 safe_close(fd);
316         } else
317                 r = nfd;
318
319         return r;
320 }
321
322 static bool is_terminal_input(ExecInput i) {
323         return
324                 i == EXEC_INPUT_TTY ||
325                 i == EXEC_INPUT_TTY_FORCE ||
326                 i == EXEC_INPUT_TTY_FAIL;
327 }
328
329 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
330
331         if (is_terminal_input(std_input) && !apply_tty_stdin)
332                 return EXEC_INPUT_NULL;
333
334         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
335                 return EXEC_INPUT_NULL;
336
337         return std_input;
338 }
339
340 static int fixup_output(ExecOutput std_output, int socket_fd) {
341
342         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
343                 return EXEC_OUTPUT_INHERIT;
344
345         return std_output;
346 }
347
348 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
349         ExecInput i;
350
351         assert(context);
352
353         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
354
355         switch (i) {
356
357         case EXEC_INPUT_NULL:
358                 return open_null_as(O_RDONLY, STDIN_FILENO);
359
360         case EXEC_INPUT_TTY:
361         case EXEC_INPUT_TTY_FORCE:
362         case EXEC_INPUT_TTY_FAIL: {
363                 int fd, r;
364
365                 fd = acquire_terminal(tty_path(context),
366                                       i == EXEC_INPUT_TTY_FAIL,
367                                       i == EXEC_INPUT_TTY_FORCE,
368                                       false,
369                                       USEC_INFINITY);
370                 if (fd < 0)
371                         return fd;
372
373                 if (fd != STDIN_FILENO) {
374                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
375                         safe_close(fd);
376                 } else
377                         r = STDIN_FILENO;
378
379                 return r;
380         }
381
382         case EXEC_INPUT_SOCKET:
383                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
384
385         default:
386                 assert_not_reached("Unknown input type");
387         }
388 }
389
390 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
391         ExecOutput o;
392         ExecInput i;
393         int r;
394
395         assert(context);
396         assert(ident);
397
398         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
399         o = fixup_output(context->std_output, socket_fd);
400
401         if (fileno == STDERR_FILENO) {
402                 ExecOutput e;
403                 e = fixup_output(context->std_error, socket_fd);
404
405                 /* This expects the input and output are already set up */
406
407                 /* Don't change the stderr file descriptor if we inherit all
408                  * the way and are not on a tty */
409                 if (e == EXEC_OUTPUT_INHERIT &&
410                     o == EXEC_OUTPUT_INHERIT &&
411                     i == EXEC_INPUT_NULL &&
412                     !is_terminal_input(context->std_input) &&
413                     getppid () != 1)
414                         return fileno;
415
416                 /* Duplicate from stdout if possible */
417                 if (e == o || e == EXEC_OUTPUT_INHERIT)
418                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
419
420                 o = e;
421
422         } else if (o == EXEC_OUTPUT_INHERIT) {
423                 /* If input got downgraded, inherit the original value */
424                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
425                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
426
427                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
428                 if (i != EXEC_INPUT_NULL)
429                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
430
431                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
432                 if (getppid() != 1)
433                         return fileno;
434
435                 /* We need to open /dev/null here anew, to get the right access mode. */
436                 return open_null_as(O_WRONLY, fileno);
437         }
438
439         switch (o) {
440
441         case EXEC_OUTPUT_NULL:
442                 return open_null_as(O_WRONLY, fileno);
443
444         case EXEC_OUTPUT_TTY:
445                 if (is_terminal_input(i))
446                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
447
448                 /* We don't reset the terminal if this is just about output */
449                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
450
451         case EXEC_OUTPUT_SYSLOG:
452         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
453         case EXEC_OUTPUT_KMSG:
454         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
455         case EXEC_OUTPUT_JOURNAL:
456         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
457                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
458                 if (r < 0) {
459                         log_unit_struct(unit_id,
460                                         LOG_ERR,
461                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
462                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
463                                                     unit_id, strerror(-r)),
464                                         LOG_ERRNO(-r),
465                                         NULL);
466                         r = open_null_as(O_WRONLY, fileno);
467                 }
468                 return r;
469
470         case EXEC_OUTPUT_SOCKET:
471                 assert(socket_fd >= 0);
472                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
473
474         default:
475                 assert_not_reached("Unknown error type");
476         }
477 }
478
479 static int chown_terminal(int fd, uid_t uid) {
480         struct stat st;
481
482         assert(fd >= 0);
483
484         /* This might fail. What matters are the results. */
485         (void) fchown(fd, uid, -1);
486         (void) fchmod(fd, TTY_MODE);
487
488         if (fstat(fd, &st) < 0)
489                 return -errno;
490
491         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
492                 return -EPERM;
493
494         return 0;
495 }
496
497 static int setup_confirm_stdio(int *_saved_stdin,
498                                int *_saved_stdout) {
499         int fd = -1, saved_stdin, saved_stdout = -1, r;
500
501         assert(_saved_stdin);
502         assert(_saved_stdout);
503
504         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
505         if (saved_stdin < 0)
506                 return -errno;
507
508         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
509         if (saved_stdout < 0) {
510                 r = errno;
511                 goto fail;
512         }
513
514         fd = acquire_terminal(
515                         "/dev/console",
516                         false,
517                         false,
518                         false,
519                         DEFAULT_CONFIRM_USEC);
520         if (fd < 0) {
521                 r = fd;
522                 goto fail;
523         }
524
525         r = chown_terminal(fd, getuid());
526         if (r < 0)
527                 goto fail;
528
529         if (dup2(fd, STDIN_FILENO) < 0) {
530                 r = -errno;
531                 goto fail;
532         }
533
534         if (dup2(fd, STDOUT_FILENO) < 0) {
535                 r = -errno;
536                 goto fail;
537         }
538
539         if (fd >= 2)
540                 safe_close(fd);
541
542         *_saved_stdin = saved_stdin;
543         *_saved_stdout = saved_stdout;
544
545         return 0;
546
547 fail:
548         safe_close(saved_stdout);
549         safe_close(saved_stdin);
550         safe_close(fd);
551
552         return r;
553 }
554
555 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
556         _cleanup_close_ int fd = -1;
557         va_list ap;
558
559         assert(format);
560
561         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
562         if (fd < 0)
563                 return fd;
564
565         va_start(ap, format);
566         vdprintf(fd, format, ap);
567         va_end(ap);
568
569         return 0;
570 }
571
572 static int restore_confirm_stdio(int *saved_stdin,
573                                  int *saved_stdout) {
574
575         int r = 0;
576
577         assert(saved_stdin);
578         assert(saved_stdout);
579
580         release_terminal();
581
582         if (*saved_stdin >= 0)
583                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
584                         r = -errno;
585
586         if (*saved_stdout >= 0)
587                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
588                         r = -errno;
589
590         safe_close(*saved_stdin);
591         safe_close(*saved_stdout);
592
593         return r;
594 }
595
596 static int ask_for_confirmation(char *response, char **argv) {
597         int saved_stdout = -1, saved_stdin = -1, r;
598         _cleanup_free_ char *line = NULL;
599
600         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
601         if (r < 0)
602                 return r;
603
604         line = exec_command_line(argv);
605         if (!line)
606                 return -ENOMEM;
607
608         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
609
610         restore_confirm_stdio(&saved_stdin, &saved_stdout);
611
612         return r;
613 }
614
615 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
616         bool keep_groups = false;
617         int r;
618
619         assert(context);
620
621         /* Lookup and set GID and supplementary group list. Here too
622          * we avoid NSS lookups for gid=0. */
623
624         if (context->group || username) {
625
626                 if (context->group) {
627                         const char *g = context->group;
628
629                         if ((r = get_group_creds(&g, &gid)) < 0)
630                                 return r;
631                 }
632
633                 /* First step, initialize groups from /etc/groups */
634                 if (username && gid != 0) {
635                         if (initgroups(username, gid) < 0)
636                                 return -errno;
637
638                         keep_groups = true;
639                 }
640
641                 /* Second step, set our gids */
642                 if (setresgid(gid, gid, gid) < 0)
643                         return -errno;
644         }
645
646         if (context->supplementary_groups) {
647                 int ngroups_max, k;
648                 gid_t *gids;
649                 char **i;
650
651                 /* Final step, initialize any manually set supplementary groups */
652                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
653
654                 if (!(gids = new(gid_t, ngroups_max)))
655                         return -ENOMEM;
656
657                 if (keep_groups) {
658                         if ((k = getgroups(ngroups_max, gids)) < 0) {
659                                 free(gids);
660                                 return -errno;
661                         }
662                 } else
663                         k = 0;
664
665                 STRV_FOREACH(i, context->supplementary_groups) {
666                         const char *g;
667
668                         if (k >= ngroups_max) {
669                                 free(gids);
670                                 return -E2BIG;
671                         }
672
673                         g = *i;
674                         r = get_group_creds(&g, gids+k);
675                         if (r < 0) {
676                                 free(gids);
677                                 return r;
678                         }
679
680                         k++;
681                 }
682
683                 if (setgroups(k, gids) < 0) {
684                         free(gids);
685                         return -errno;
686                 }
687
688                 free(gids);
689         }
690
691         return 0;
692 }
693
694 static int enforce_user(const ExecContext *context, uid_t uid) {
695         assert(context);
696
697         /* Sets (but doesn't lookup) the uid and make sure we keep the
698          * capabilities while doing so. */
699
700         if (context->capabilities) {
701                 _cleanup_cap_free_ cap_t d = NULL;
702                 static const cap_value_t bits[] = {
703                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
704                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
705                 };
706
707                 /* First step: If we need to keep capabilities but
708                  * drop privileges we need to make sure we keep our
709                  * caps, while we drop privileges. */
710                 if (uid != 0) {
711                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
712
713                         if (prctl(PR_GET_SECUREBITS) != sb)
714                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
715                                         return -errno;
716                 }
717
718                 /* Second step: set the capabilities. This will reduce
719                  * the capabilities to the minimum we need. */
720
721                 d = cap_dup(context->capabilities);
722                 if (!d)
723                         return -errno;
724
725                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
726                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
727                         return -errno;
728
729                 if (cap_set_proc(d) < 0)
730                         return -errno;
731         }
732
733         /* Third step: actually set the uids */
734         if (setresuid(uid, uid, uid) < 0)
735                 return -errno;
736
737         /* At this point we should have all necessary capabilities but
738            are otherwise a normal user. However, the caps might got
739            corrupted due to the setresuid() so we need clean them up
740            later. This is done outside of this call. */
741
742         return 0;
743 }
744
745 #ifdef HAVE_PAM
746
747 static int null_conv(
748                 int num_msg,
749                 const struct pam_message **msg,
750                 struct pam_response **resp,
751                 void *appdata_ptr) {
752
753         /* We don't support conversations */
754
755         return PAM_CONV_ERR;
756 }
757
758 static int setup_pam(
759                 const char *name,
760                 const char *user,
761                 uid_t uid,
762                 const char *tty,
763                 char ***pam_env,
764                 int fds[], unsigned n_fds) {
765
766         static const struct pam_conv conv = {
767                 .conv = null_conv,
768                 .appdata_ptr = NULL
769         };
770
771         pam_handle_t *handle = NULL;
772         sigset_t ss, old_ss;
773         int pam_code = PAM_SUCCESS;
774         int err;
775         char **e = NULL;
776         bool close_session = false;
777         pid_t pam_pid = 0, parent_pid;
778         int flags = 0;
779
780         assert(name);
781         assert(user);
782         assert(pam_env);
783
784         /* We set up PAM in the parent process, then fork. The child
785          * will then stay around until killed via PR_GET_PDEATHSIG or
786          * systemd via the cgroup logic. It will then remove the PAM
787          * session again. The parent process will exec() the actual
788          * daemon. We do things this way to ensure that the main PID
789          * of the daemon is the one we initially fork()ed. */
790
791         if (log_get_max_level() < LOG_DEBUG)
792                 flags |= PAM_SILENT;
793
794         pam_code = pam_start(name, user, &conv, &handle);
795         if (pam_code != PAM_SUCCESS) {
796                 handle = NULL;
797                 goto fail;
798         }
799
800         if (tty) {
801                 pam_code = pam_set_item(handle, PAM_TTY, tty);
802                 if (pam_code != PAM_SUCCESS)
803                         goto fail;
804         }
805
806         pam_code = pam_acct_mgmt(handle, flags);
807         if (pam_code != PAM_SUCCESS)
808                 goto fail;
809
810         pam_code = pam_open_session(handle, flags);
811         if (pam_code != PAM_SUCCESS)
812                 goto fail;
813
814         close_session = true;
815
816         e = pam_getenvlist(handle);
817         if (!e) {
818                 pam_code = PAM_BUF_ERR;
819                 goto fail;
820         }
821
822         /* Block SIGTERM, so that we know that it won't get lost in
823          * the child */
824         if (sigemptyset(&ss) < 0 ||
825             sigaddset(&ss, SIGTERM) < 0 ||
826             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
827                 goto fail;
828
829         parent_pid = getpid();
830
831         pam_pid = fork();
832         if (pam_pid < 0)
833                 goto fail;
834
835         if (pam_pid == 0) {
836                 int sig;
837                 int r = EXIT_PAM;
838
839                 /* The child's job is to reset the PAM session on
840                  * termination */
841
842                 /* This string must fit in 10 chars (i.e. the length
843                  * of "/sbin/init"), to look pretty in /bin/ps */
844                 rename_process("(sd-pam)");
845
846                 /* Make sure we don't keep open the passed fds in this
847                 child. We assume that otherwise only those fds are
848                 open here that have been opened by PAM. */
849                 close_many(fds, n_fds);
850
851                 /* Drop privileges - we don't need any to pam_close_session
852                  * and this will make PR_SET_PDEATHSIG work in most cases.
853                  * If this fails, ignore the error - but expect sd-pam threads
854                  * to fail to exit normally */
855                 if (setresuid(uid, uid, uid) < 0)
856                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
857
858                 /* Wait until our parent died. This will only work if
859                  * the above setresuid() succeeds, otherwise the kernel
860                  * will not allow unprivileged parents kill their privileged
861                  * children this way. We rely on the control groups kill logic
862                  * to do the rest for us. */
863                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
864                         goto child_finish;
865
866                 /* Check if our parent process might already have
867                  * died? */
868                 if (getppid() == parent_pid) {
869                         for (;;) {
870                                 if (sigwait(&ss, &sig) < 0) {
871                                         if (errno == EINTR)
872                                                 continue;
873
874                                         goto child_finish;
875                                 }
876
877                                 assert(sig == SIGTERM);
878                                 break;
879                         }
880                 }
881
882                 /* If our parent died we'll end the session */
883                 if (getppid() != parent_pid) {
884                         pam_code = pam_close_session(handle, flags);
885                         if (pam_code != PAM_SUCCESS)
886                                 goto child_finish;
887                 }
888
889                 r = 0;
890
891         child_finish:
892                 pam_end(handle, pam_code | flags);
893                 _exit(r);
894         }
895
896         /* If the child was forked off successfully it will do all the
897          * cleanups, so forget about the handle here. */
898         handle = NULL;
899
900         /* Unblock SIGTERM again in the parent */
901         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
902                 goto fail;
903
904         /* We close the log explicitly here, since the PAM modules
905          * might have opened it, but we don't want this fd around. */
906         closelog();
907
908         *pam_env = e;
909         e = NULL;
910
911         return 0;
912
913 fail:
914         if (pam_code != PAM_SUCCESS) {
915                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
916                 err = -EPERM;  /* PAM errors do not map to errno */
917         } else {
918                 log_error_errno(errno, "PAM failed: %m");
919                 err = -errno;
920         }
921
922         if (handle) {
923                 if (close_session)
924                         pam_code = pam_close_session(handle, flags);
925
926                 pam_end(handle, pam_code | flags);
927         }
928
929         strv_free(e);
930
931         closelog();
932
933         if (pam_pid > 1) {
934                 kill(pam_pid, SIGTERM);
935                 kill(pam_pid, SIGCONT);
936         }
937
938         return err;
939 }
940 #endif
941
942 static void rename_process_from_path(const char *path) {
943         char process_name[11];
944         const char *p;
945         size_t l;
946
947         /* This resulting string must fit in 10 chars (i.e. the length
948          * of "/sbin/init") to look pretty in /bin/ps */
949
950         p = basename(path);
951         if (isempty(p)) {
952                 rename_process("(...)");
953                 return;
954         }
955
956         l = strlen(p);
957         if (l > 8) {
958                 /* The end of the process name is usually more
959                  * interesting, since the first bit might just be
960                  * "systemd-" */
961                 p = p + l - 8;
962                 l = 8;
963         }
964
965         process_name[0] = '(';
966         memcpy(process_name+1, p, l);
967         process_name[1+l] = ')';
968         process_name[1+l+1] = 0;
969
970         rename_process(process_name);
971 }
972
973 #ifdef HAVE_SECCOMP
974
975 static int apply_seccomp(const ExecContext *c) {
976         uint32_t negative_action, action;
977         scmp_filter_ctx *seccomp;
978         Iterator i;
979         void *id;
980         int r;
981
982         assert(c);
983
984         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
985
986         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
987         if (!seccomp)
988                 return -ENOMEM;
989
990         if (c->syscall_archs) {
991
992                 SET_FOREACH(id, c->syscall_archs, i) {
993                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
994                         if (r == -EEXIST)
995                                 continue;
996                         if (r < 0)
997                                 goto finish;
998                 }
999
1000         } else {
1001                 r = seccomp_add_secondary_archs(seccomp);
1002                 if (r < 0)
1003                         goto finish;
1004         }
1005
1006         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1007         SET_FOREACH(id, c->syscall_filter, i) {
1008                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1009                 if (r < 0)
1010                         goto finish;
1011         }
1012
1013         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1014         if (r < 0)
1015                 goto finish;
1016
1017         r = seccomp_load(seccomp);
1018
1019 finish:
1020         seccomp_release(seccomp);
1021         return r;
1022 }
1023
1024 static int apply_address_families(const ExecContext *c) {
1025         scmp_filter_ctx *seccomp;
1026         Iterator i;
1027         int r;
1028
1029         assert(c);
1030
1031         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1032         if (!seccomp)
1033                 return -ENOMEM;
1034
1035         r = seccomp_add_secondary_archs(seccomp);
1036         if (r < 0)
1037                 goto finish;
1038
1039         if (c->address_families_whitelist) {
1040                 int af, first = 0, last = 0;
1041                 void *afp;
1042
1043                 /* If this is a whitelist, we first block the address
1044                  * families that are out of range and then everything
1045                  * that is not in the set. First, we find the lowest
1046                  * and highest address family in the set. */
1047
1048                 SET_FOREACH(afp, c->address_families, i) {
1049                         af = PTR_TO_INT(afp);
1050
1051                         if (af <= 0 || af >= af_max())
1052                                 continue;
1053
1054                         if (first == 0 || af < first)
1055                                 first = af;
1056
1057                         if (last == 0 || af > last)
1058                                 last = af;
1059                 }
1060
1061                 assert((first == 0) == (last == 0));
1062
1063                 if (first == 0) {
1064
1065                         /* No entries in the valid range, block everything */
1066                         r = seccomp_rule_add(
1067                                         seccomp,
1068                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1069                                         SCMP_SYS(socket),
1070                                         0);
1071                         if (r < 0)
1072                                 goto finish;
1073
1074                 } else {
1075
1076                         /* Block everything below the first entry */
1077                         r = seccomp_rule_add(
1078                                         seccomp,
1079                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1080                                         SCMP_SYS(socket),
1081                                         1,
1082                                         SCMP_A0(SCMP_CMP_LT, first));
1083                         if (r < 0)
1084                                 goto finish;
1085
1086                         /* Block everything above the last entry */
1087                         r = seccomp_rule_add(
1088                                         seccomp,
1089                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1090                                         SCMP_SYS(socket),
1091                                         1,
1092                                         SCMP_A0(SCMP_CMP_GT, last));
1093                         if (r < 0)
1094                                 goto finish;
1095
1096                         /* Block everything between the first and last
1097                          * entry */
1098                         for (af = 1; af < af_max(); af++) {
1099
1100                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1101                                         continue;
1102
1103                                 r = seccomp_rule_add(
1104                                                 seccomp,
1105                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1106                                                 SCMP_SYS(socket),
1107                                                 1,
1108                                                 SCMP_A0(SCMP_CMP_EQ, af));
1109                                 if (r < 0)
1110                                         goto finish;
1111                         }
1112                 }
1113
1114         } else {
1115                 void *af;
1116
1117                 /* If this is a blacklist, then generate one rule for
1118                  * each address family that are then combined in OR
1119                  * checks. */
1120
1121                 SET_FOREACH(af, c->address_families, i) {
1122
1123                         r = seccomp_rule_add(
1124                                         seccomp,
1125                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1126                                         SCMP_SYS(socket),
1127                                         1,
1128                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1129                         if (r < 0)
1130                                 goto finish;
1131                 }
1132         }
1133
1134         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1135         if (r < 0)
1136                 goto finish;
1137
1138         r = seccomp_load(seccomp);
1139
1140 finish:
1141         seccomp_release(seccomp);
1142         return r;
1143 }
1144
1145 #endif
1146
1147 static void do_idle_pipe_dance(int idle_pipe[4]) {
1148         assert(idle_pipe);
1149
1150
1151         safe_close(idle_pipe[1]);
1152         safe_close(idle_pipe[2]);
1153
1154         if (idle_pipe[0] >= 0) {
1155                 int r;
1156
1157                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1158
1159                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1160                         /* Signal systemd that we are bored and want to continue. */
1161                         r = write(idle_pipe[3], "x", 1);
1162                         if (r > 0)
1163                                 /* Wait for systemd to react to the signal above. */
1164                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1165                 }
1166
1167                 safe_close(idle_pipe[0]);
1168
1169         }
1170
1171         safe_close(idle_pipe[3]);
1172 }
1173
1174 static int build_environment(
1175                 const ExecContext *c,
1176                 unsigned n_fds,
1177                 usec_t watchdog_usec,
1178                 const char *home,
1179                 const char *username,
1180                 const char *shell,
1181                 char ***ret) {
1182
1183         _cleanup_strv_free_ char **our_env = NULL;
1184         unsigned n_env = 0;
1185         char *x;
1186
1187         assert(c);
1188         assert(ret);
1189
1190         our_env = new0(char*, 10);
1191         if (!our_env)
1192                 return -ENOMEM;
1193
1194         if (n_fds > 0) {
1195                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1196                         return -ENOMEM;
1197                 our_env[n_env++] = x;
1198
1199                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1200                         return -ENOMEM;
1201                 our_env[n_env++] = x;
1202         }
1203
1204         if (watchdog_usec > 0) {
1205                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1206                         return -ENOMEM;
1207                 our_env[n_env++] = x;
1208
1209                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1210                         return -ENOMEM;
1211                 our_env[n_env++] = x;
1212         }
1213
1214         if (home) {
1215                 x = strappend("HOME=", home);
1216                 if (!x)
1217                         return -ENOMEM;
1218                 our_env[n_env++] = x;
1219         }
1220
1221         if (username) {
1222                 x = strappend("LOGNAME=", username);
1223                 if (!x)
1224                         return -ENOMEM;
1225                 our_env[n_env++] = x;
1226
1227                 x = strappend("USER=", username);
1228                 if (!x)
1229                         return -ENOMEM;
1230                 our_env[n_env++] = x;
1231         }
1232
1233         if (shell) {
1234                 x = strappend("SHELL=", shell);
1235                 if (!x)
1236                         return -ENOMEM;
1237                 our_env[n_env++] = x;
1238         }
1239
1240         if (is_terminal_input(c->std_input) ||
1241             c->std_output == EXEC_OUTPUT_TTY ||
1242             c->std_error == EXEC_OUTPUT_TTY ||
1243             c->tty_path) {
1244
1245                 x = strdup(default_term_for_tty(tty_path(c)));
1246                 if (!x)
1247                         return -ENOMEM;
1248                 our_env[n_env++] = x;
1249         }
1250
1251         our_env[n_env++] = NULL;
1252         assert(n_env <= 10);
1253
1254         *ret = our_env;
1255         our_env = NULL;
1256
1257         return 0;
1258 }
1259
1260 static int exec_child(
1261                 ExecCommand *command,
1262                 const ExecContext *context,
1263                 const ExecParameters *params,
1264                 ExecRuntime *runtime,
1265                 char **argv,
1266                 int socket_fd,
1267                 int *fds, unsigned n_fds,
1268                 char **files_env,
1269                 int *exit_status) {
1270
1271         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1272         _cleanup_free_ char *mac_selinux_context_net = NULL;
1273         const char *username = NULL, *home = NULL, *shell = NULL;
1274         unsigned n_dont_close = 0;
1275         int dont_close[n_fds + 4];
1276         uid_t uid = UID_INVALID;
1277         gid_t gid = GID_INVALID;
1278         int i, r;
1279
1280         assert(command);
1281         assert(context);
1282         assert(params);
1283         assert(exit_status);
1284
1285         rename_process_from_path(command->path);
1286
1287         /* We reset exactly these signals, since they are the
1288          * only ones we set to SIG_IGN in the main daemon. All
1289          * others we leave untouched because we set them to
1290          * SIG_DFL or a valid handler initially, both of which
1291          * will be demoted to SIG_DFL. */
1292         default_signals(SIGNALS_CRASH_HANDLER,
1293                         SIGNALS_IGNORE, -1);
1294
1295         if (context->ignore_sigpipe)
1296                 ignore_signals(SIGPIPE, -1);
1297
1298         r = reset_signal_mask();
1299         if (r < 0) {
1300                 *exit_status = EXIT_SIGNAL_MASK;
1301                 return r;
1302         }
1303
1304         if (params->idle_pipe)
1305                 do_idle_pipe_dance(params->idle_pipe);
1306
1307         /* Close sockets very early to make sure we don't
1308          * block init reexecution because it cannot bind its
1309          * sockets */
1310
1311         log_forget_fds();
1312
1313         if (socket_fd >= 0)
1314                 dont_close[n_dont_close++] = socket_fd;
1315         if (n_fds > 0) {
1316                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1317                 n_dont_close += n_fds;
1318         }
1319         if (params->bus_endpoint_fd >= 0)
1320                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1321         if (runtime) {
1322                 if (runtime->netns_storage_socket[0] >= 0)
1323                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1324                 if (runtime->netns_storage_socket[1] >= 0)
1325                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1326         }
1327
1328         r = close_all_fds(dont_close, n_dont_close);
1329         if (r < 0) {
1330                 *exit_status = EXIT_FDS;
1331                 return r;
1332         }
1333
1334         if (!context->same_pgrp)
1335                 if (setsid() < 0) {
1336                         *exit_status = EXIT_SETSID;
1337                         return -errno;
1338                 }
1339
1340         exec_context_tty_reset(context);
1341
1342         if (params->confirm_spawn) {
1343                 char response;
1344
1345                 r = ask_for_confirmation(&response, argv);
1346                 if (r == -ETIMEDOUT)
1347                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1348                 else if (r < 0)
1349                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1350                 else if (response == 's') {
1351                         write_confirm_message("Skipping execution.\n");
1352                         *exit_status = EXIT_CONFIRM;
1353                         return -ECANCELED;
1354                 } else if (response == 'n') {
1355                         write_confirm_message("Failing execution.\n");
1356                         *exit_status = 0;
1357                         return 0;
1358                 }
1359         }
1360
1361         if (context->user) {
1362                 username = context->user;
1363                 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1364                 if (r < 0) {
1365                         *exit_status = EXIT_USER;
1366                         return r;
1367                 }
1368         }
1369
1370         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1371          * must sure to drop O_NONBLOCK */
1372         if (socket_fd >= 0)
1373                 fd_nonblock(socket_fd, false);
1374
1375         r = setup_input(context, socket_fd, params->apply_tty_stdin);
1376         if (r < 0) {
1377                 *exit_status = EXIT_STDIN;
1378                 return r;
1379         }
1380
1381         r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1382         if (r < 0) {
1383                 *exit_status = EXIT_STDOUT;
1384                 return r;
1385         }
1386
1387         r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1388         if (r < 0) {
1389                 *exit_status = EXIT_STDERR;
1390                 return r;
1391         }
1392
1393         if (params->cgroup_path) {
1394                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1395                 if (r < 0) {
1396                         *exit_status = EXIT_CGROUP;
1397                         return r;
1398                 }
1399         }
1400
1401         if (context->oom_score_adjust_set) {
1402                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1403
1404                 /* When we can't make this change due to EPERM, then
1405                  * let's silently skip over it. User namespaces
1406                  * prohibit write access to this file, and we
1407                  * shouldn't trip up over that. */
1408
1409                 sprintf(t, "%i", context->oom_score_adjust);
1410                 r = write_string_file("/proc/self/oom_score_adj", t);
1411                 if (r == -EPERM || r == -EACCES) {
1412                         log_open();
1413                         log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1414                         log_close();
1415                 } else if (r < 0) {
1416                         *exit_status = EXIT_OOM_ADJUST;
1417                         return -errno;
1418                 }
1419         }
1420
1421         if (context->nice_set)
1422                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1423                         *exit_status = EXIT_NICE;
1424                         return -errno;
1425                 }
1426
1427         if (context->cpu_sched_set) {
1428                 struct sched_param param = {
1429                         .sched_priority = context->cpu_sched_priority,
1430                 };
1431
1432                 r = sched_setscheduler(0,
1433                                        context->cpu_sched_policy |
1434                                        (context->cpu_sched_reset_on_fork ?
1435                                         SCHED_RESET_ON_FORK : 0),
1436                                        &param);
1437                 if (r < 0) {
1438                         *exit_status = EXIT_SETSCHEDULER;
1439                         return -errno;
1440                 }
1441         }
1442
1443         if (context->cpuset)
1444                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1445                         *exit_status = EXIT_CPUAFFINITY;
1446                         return -errno;
1447                 }
1448
1449         if (context->ioprio_set)
1450                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1451                         *exit_status = EXIT_IOPRIO;
1452                         return -errno;
1453                 }
1454
1455         if (context->timer_slack_nsec != NSEC_INFINITY)
1456                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1457                         *exit_status = EXIT_TIMERSLACK;
1458                         return -errno;
1459                 }
1460
1461         if (context->personality != 0xffffffffUL)
1462                 if (personality(context->personality) < 0) {
1463                         *exit_status = EXIT_PERSONALITY;
1464                         return -errno;
1465                 }
1466
1467         if (context->utmp_id)
1468                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1469
1470         if (context->user && is_terminal_input(context->std_input)) {
1471                 r = chown_terminal(STDIN_FILENO, uid);
1472                 if (r < 0) {
1473                         *exit_status = EXIT_STDIN;
1474                         return r;
1475                 }
1476         }
1477
1478 #ifdef ENABLE_KDBUS
1479         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1480                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1481
1482                 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1483                 if (r < 0) {
1484                         *exit_status = EXIT_BUS_ENDPOINT;
1485                         return r;
1486                 }
1487         }
1488 #endif
1489
1490         /* If delegation is enabled we'll pass ownership of the cgroup
1491          * (but only in systemd's own controller hierarchy!) to the
1492          * user of the new process. */
1493         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1494                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1495                 if (r < 0) {
1496                         *exit_status = EXIT_CGROUP;
1497                         return r;
1498                 }
1499
1500
1501                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1502                 if (r < 0) {
1503                         *exit_status = EXIT_CGROUP;
1504                         return r;
1505                 }
1506         }
1507
1508         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1509                 char **rt;
1510
1511                 STRV_FOREACH(rt, context->runtime_directory) {
1512                         _cleanup_free_ char *p;
1513
1514                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1515                         if (!p) {
1516                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1517                                 return -ENOMEM;
1518                         }
1519
1520                         r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1521                         if (r < 0) {
1522                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1523                                 return r;
1524                         }
1525                 }
1526         }
1527
1528         if (params->apply_permissions) {
1529                 r = enforce_groups(context, username, gid);
1530                 if (r < 0) {
1531                         *exit_status = EXIT_GROUP;
1532                         return r;
1533                 }
1534         }
1535
1536         umask(context->umask);
1537
1538 #ifdef HAVE_PAM
1539         if (params->apply_permissions && context->pam_name && username) {
1540                 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1541                 if (r < 0) {
1542                         *exit_status = EXIT_PAM;
1543                         return r;
1544                 }
1545         }
1546 #endif
1547
1548         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1549                 r = setup_netns(runtime->netns_storage_socket);
1550                 if (r < 0) {
1551                         *exit_status = EXIT_NETWORK;
1552                         return r;
1553                 }
1554         }
1555
1556         if (!strv_isempty(context->read_write_dirs) ||
1557             !strv_isempty(context->read_only_dirs) ||
1558             !strv_isempty(context->inaccessible_dirs) ||
1559             context->mount_flags != 0 ||
1560             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1561             params->bus_endpoint_path ||
1562             context->private_devices ||
1563             context->protect_system != PROTECT_SYSTEM_NO ||
1564             context->protect_home != PROTECT_HOME_NO) {
1565
1566                 char *tmp = NULL, *var = NULL;
1567
1568                 /* The runtime struct only contains the parent
1569                  * of the private /tmp, which is
1570                  * non-accessible to world users. Inside of it
1571                  * there's a /tmp that is sticky, and that's
1572                  * the one we want to use here. */
1573
1574                 if (context->private_tmp && runtime) {
1575                         if (runtime->tmp_dir)
1576                                 tmp = strjoina(runtime->tmp_dir, "/tmp");
1577                         if (runtime->var_tmp_dir)
1578                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
1579                 }
1580
1581                 r = setup_namespace(
1582                                 context->read_write_dirs,
1583                                 context->read_only_dirs,
1584                                 context->inaccessible_dirs,
1585                                 tmp,
1586                                 var,
1587                                 params->bus_endpoint_path,
1588                                 context->private_devices,
1589                                 context->protect_home,
1590                                 context->protect_system,
1591                                 context->mount_flags);
1592
1593                 /* If we couldn't set up the namespace this is
1594                  * probably due to a missing capability. In this case,
1595                  * silently proceeed. */
1596                 if (r == -EPERM || r == -EACCES) {
1597                         log_open();
1598                         log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1599                         log_close();
1600                 } else if (r < 0) {
1601                         *exit_status = EXIT_NAMESPACE;
1602                         return r;
1603                 }
1604         }
1605
1606         if (params->apply_chroot) {
1607                 if (context->root_directory)
1608                         if (chroot(context->root_directory) < 0) {
1609                                 *exit_status = EXIT_CHROOT;
1610                                 return -errno;
1611                         }
1612
1613                 if (chdir(context->working_directory ?: "/") < 0 &&
1614                     !context->working_directory_missing_ok) {
1615                         *exit_status = EXIT_CHDIR;
1616                         return -errno;
1617                 }
1618         } else {
1619                 _cleanup_free_ char *d = NULL;
1620
1621                 if (asprintf(&d, "%s/%s",
1622                              context->root_directory ?: "",
1623                              context->working_directory ?: "") < 0) {
1624                         *exit_status = EXIT_MEMORY;
1625                         return -ENOMEM;
1626                 }
1627
1628                 if (chdir(d) < 0 &&
1629                     !context->working_directory_missing_ok) {
1630                         *exit_status = EXIT_CHDIR;
1631                         return -errno;
1632                 }
1633         }
1634
1635 #ifdef HAVE_SELINUX
1636         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1637                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1638                 if (r < 0) {
1639                         *exit_status = EXIT_SELINUX_CONTEXT;
1640                         return r;
1641                 }
1642         }
1643 #endif
1644
1645         /* We repeat the fd closing here, to make sure that
1646          * nothing is leaked from the PAM modules. Note that
1647          * we are more aggressive this time since socket_fd
1648          * and the netns fds we don't need anymore. The custom
1649          * endpoint fd was needed to upload the policy and can
1650          * now be closed as well. */
1651         r = close_all_fds(fds, n_fds);
1652         if (r >= 0)
1653                 r = shift_fds(fds, n_fds);
1654         if (r >= 0)
1655                 r = flags_fds(fds, n_fds, context->non_blocking);
1656         if (r < 0) {
1657                 *exit_status = EXIT_FDS;
1658                 return r;
1659         }
1660
1661         if (params->apply_permissions) {
1662
1663                 for (i = 0; i < _RLIMIT_MAX; i++) {
1664                         if (!context->rlimit[i])
1665                                 continue;
1666
1667                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1668                                 *exit_status = EXIT_LIMITS;
1669                                 return -errno;
1670                         }
1671                 }
1672
1673                 if (context->capability_bounding_set_drop) {
1674                         r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1675                         if (r < 0) {
1676                                 *exit_status = EXIT_CAPABILITIES;
1677                                 return r;
1678                         }
1679                 }
1680
1681 #ifdef HAVE_SMACK
1682                 if (context->smack_process_label) {
1683                         r = mac_smack_apply_pid(0, context->smack_process_label);
1684                         if (r < 0) {
1685                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1686                                 return r;
1687                         }
1688                 }
1689 #endif
1690
1691                 if (context->user) {
1692                         r = enforce_user(context, uid);
1693                         if (r < 0) {
1694                                 *exit_status = EXIT_USER;
1695                                 return r;
1696                         }
1697                 }
1698
1699                 /* PR_GET_SECUREBITS is not privileged, while
1700                  * PR_SET_SECUREBITS is. So to suppress
1701                  * potential EPERMs we'll try not to call
1702                  * PR_SET_SECUREBITS unless necessary. */
1703                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1704                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1705                                 *exit_status = EXIT_SECUREBITS;
1706                                 return -errno;
1707                         }
1708
1709                 if (context->capabilities)
1710                         if (cap_set_proc(context->capabilities) < 0) {
1711                                 *exit_status = EXIT_CAPABILITIES;
1712                                 return -errno;
1713                         }
1714
1715                 if (context->no_new_privileges)
1716                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1717                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1718                                 return -errno;
1719                         }
1720
1721 #ifdef HAVE_SECCOMP
1722                 if (context->address_families_whitelist ||
1723                     !set_isempty(context->address_families)) {
1724                         r = apply_address_families(context);
1725                         if (r < 0) {
1726                                 *exit_status = EXIT_ADDRESS_FAMILIES;
1727                                 return r;
1728                         }
1729                 }
1730
1731                 if (context->syscall_whitelist ||
1732                     !set_isempty(context->syscall_filter) ||
1733                     !set_isempty(context->syscall_archs)) {
1734                         r = apply_seccomp(context);
1735                         if (r < 0) {
1736                                 *exit_status = EXIT_SECCOMP;
1737                                 return r;
1738                         }
1739                 }
1740 #endif
1741
1742 #ifdef HAVE_SELINUX
1743                 if (mac_selinux_use()) {
1744                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1745
1746                         if (exec_context) {
1747                                 r = setexeccon(exec_context);
1748                                 if (r < 0) {
1749                                         *exit_status = EXIT_SELINUX_CONTEXT;
1750                                         return r;
1751                                 }
1752                         }
1753                 }
1754 #endif
1755
1756 #ifdef HAVE_APPARMOR
1757                 if (context->apparmor_profile && mac_apparmor_use()) {
1758                         r = aa_change_onexec(context->apparmor_profile);
1759                         if (r < 0 && !context->apparmor_profile_ignore) {
1760                                 *exit_status = EXIT_APPARMOR_PROFILE;
1761                                 return -errno;
1762                         }
1763                 }
1764 #endif
1765         }
1766
1767         r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1768         if (r < 0) {
1769                 *exit_status = EXIT_MEMORY;
1770                 return r;
1771         }
1772
1773         final_env = strv_env_merge(5,
1774                                    params->environment,
1775                                    our_env,
1776                                    context->environment,
1777                                    files_env,
1778                                    pam_env,
1779                                    NULL);
1780         if (!final_env) {
1781                 *exit_status = EXIT_MEMORY;
1782                 return -ENOMEM;
1783         }
1784
1785         final_argv = replace_env_argv(argv, final_env);
1786         if (!final_argv) {
1787                 *exit_status = EXIT_MEMORY;
1788                 return -ENOMEM;
1789         }
1790
1791         final_env = strv_env_clean(final_env);
1792
1793         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1794                 _cleanup_free_ char *line;
1795
1796                 line = exec_command_line(final_argv);
1797                 if (line) {
1798                         log_open();
1799                         log_unit_struct(params->unit_id,
1800                                         LOG_DEBUG,
1801                                         "EXECUTABLE=%s", command->path,
1802                                         LOG_MESSAGE("Executing: %s", line),
1803                                         NULL);
1804                         log_close();
1805                 }
1806         }
1807         execve(command->path, final_argv, final_env);
1808         *exit_status = EXIT_EXEC;
1809         return -errno;
1810 }
1811
1812 int exec_spawn(ExecCommand *command,
1813                const ExecContext *context,
1814                const ExecParameters *params,
1815                ExecRuntime *runtime,
1816                pid_t *ret) {
1817
1818         _cleanup_strv_free_ char **files_env = NULL;
1819         int *fds = NULL; unsigned n_fds = 0;
1820         _cleanup_free_ char *line = NULL;
1821         int socket_fd, r;
1822         char **argv;
1823         pid_t pid;
1824
1825         assert(command);
1826         assert(context);
1827         assert(ret);
1828         assert(params);
1829         assert(params->fds || params->n_fds <= 0);
1830
1831         if (context->std_input == EXEC_INPUT_SOCKET ||
1832             context->std_output == EXEC_OUTPUT_SOCKET ||
1833             context->std_error == EXEC_OUTPUT_SOCKET) {
1834
1835                 if (params->n_fds != 1) {
1836                         log_unit_error(params->unit_id, "Got more than one socket.");
1837                         return -EINVAL;
1838                 }
1839
1840                 socket_fd = params->fds[0];
1841         } else {
1842                 socket_fd = -1;
1843                 fds = params->fds;
1844                 n_fds = params->n_fds;
1845         }
1846
1847         r = exec_context_load_environment(context, params->unit_id, &files_env);
1848         if (r < 0)
1849                 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1850
1851         argv = params->argv ?: command->argv;
1852         line = exec_command_line(argv);
1853         if (!line)
1854                 return log_oom();
1855
1856         log_unit_struct(params->unit_id,
1857                         LOG_DEBUG,
1858                         "EXECUTABLE=%s", command->path,
1859                         LOG_MESSAGE("About to execute: %s", line),
1860                         NULL);
1861         pid = fork();
1862         if (pid < 0)
1863                 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1864
1865         if (pid == 0) {
1866                 int exit_status;
1867
1868                 r = exec_child(command,
1869                                context,
1870                                params,
1871                                runtime,
1872                                argv,
1873                                socket_fd,
1874                                fds, n_fds,
1875                                files_env,
1876                                &exit_status);
1877                 if (r < 0) {
1878                         log_open();
1879                         log_unit_struct(params->unit_id,
1880                                         LOG_ERR,
1881                                         LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1882                                         "EXECUTABLE=%s", command->path,
1883                                         LOG_MESSAGE("Failed at step %s spawning %s: %s",
1884                                                     exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1885                                                     command->path, strerror(-r)),
1886                                         LOG_ERRNO(r),
1887                                         NULL);
1888                 }
1889
1890                 _exit(exit_status);
1891         }
1892
1893         log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1894
1895         /* We add the new process to the cgroup both in the child (so
1896          * that we can be sure that no user code is ever executed
1897          * outside of the cgroup) and in the parent (so that we can be
1898          * sure that when we kill the cgroup the process will be
1899          * killed too). */
1900         if (params->cgroup_path)
1901                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1902
1903         exec_status_start(&command->exec_status, pid);
1904
1905         *ret = pid;
1906         return 0;
1907 }
1908
1909 void exec_context_init(ExecContext *c) {
1910         assert(c);
1911
1912         c->umask = 0022;
1913         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1914         c->cpu_sched_policy = SCHED_OTHER;
1915         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1916         c->syslog_level_prefix = true;
1917         c->ignore_sigpipe = true;
1918         c->timer_slack_nsec = NSEC_INFINITY;
1919         c->personality = 0xffffffffUL;
1920         c->runtime_directory_mode = 0755;
1921 }
1922
1923 void exec_context_done(ExecContext *c) {
1924         unsigned l;
1925
1926         assert(c);
1927
1928         strv_free(c->environment);
1929         c->environment = NULL;
1930
1931         strv_free(c->environment_files);
1932         c->environment_files = NULL;
1933
1934         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1935                 free(c->rlimit[l]);
1936                 c->rlimit[l] = NULL;
1937         }
1938
1939         free(c->working_directory);
1940         c->working_directory = NULL;
1941         free(c->root_directory);
1942         c->root_directory = NULL;
1943
1944         free(c->tty_path);
1945         c->tty_path = NULL;
1946
1947         free(c->syslog_identifier);
1948         c->syslog_identifier = NULL;
1949
1950         free(c->user);
1951         c->user = NULL;
1952
1953         free(c->group);
1954         c->group = NULL;
1955
1956         strv_free(c->supplementary_groups);
1957         c->supplementary_groups = NULL;
1958
1959         free(c->pam_name);
1960         c->pam_name = NULL;
1961
1962         if (c->capabilities) {
1963                 cap_free(c->capabilities);
1964                 c->capabilities = NULL;
1965         }
1966
1967         strv_free(c->read_only_dirs);
1968         c->read_only_dirs = NULL;
1969
1970         strv_free(c->read_write_dirs);
1971         c->read_write_dirs = NULL;
1972
1973         strv_free(c->inaccessible_dirs);
1974         c->inaccessible_dirs = NULL;
1975
1976         if (c->cpuset)
1977                 CPU_FREE(c->cpuset);
1978
1979         free(c->utmp_id);
1980         c->utmp_id = NULL;
1981
1982         free(c->selinux_context);
1983         c->selinux_context = NULL;
1984
1985         free(c->apparmor_profile);
1986         c->apparmor_profile = NULL;
1987
1988         set_free(c->syscall_filter);
1989         c->syscall_filter = NULL;
1990
1991         set_free(c->syscall_archs);
1992         c->syscall_archs = NULL;
1993
1994         set_free(c->address_families);
1995         c->address_families = NULL;
1996
1997         strv_free(c->runtime_directory);
1998         c->runtime_directory = NULL;
1999
2000         bus_endpoint_free(c->bus_endpoint);
2001         c->bus_endpoint = NULL;
2002 }
2003
2004 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2005         char **i;
2006
2007         assert(c);
2008
2009         if (!runtime_prefix)
2010                 return 0;
2011
2012         STRV_FOREACH(i, c->runtime_directory) {
2013                 _cleanup_free_ char *p;
2014
2015                 p = strjoin(runtime_prefix, "/", *i, NULL);
2016                 if (!p)
2017                         return -ENOMEM;
2018
2019                 /* We execute this synchronously, since we need to be
2020                  * sure this is gone when we start the service
2021                  * next. */
2022                 rm_rf(p, false, true, false);
2023         }
2024
2025         return 0;
2026 }
2027
2028 void exec_command_done(ExecCommand *c) {
2029         assert(c);
2030
2031         free(c->path);
2032         c->path = NULL;
2033
2034         strv_free(c->argv);
2035         c->argv = NULL;
2036 }
2037
2038 void exec_command_done_array(ExecCommand *c, unsigned n) {
2039         unsigned i;
2040
2041         for (i = 0; i < n; i++)
2042                 exec_command_done(c+i);
2043 }
2044
2045 ExecCommand* exec_command_free_list(ExecCommand *c) {
2046         ExecCommand *i;
2047
2048         while ((i = c)) {
2049                 LIST_REMOVE(command, c, i);
2050                 exec_command_done(i);
2051                 free(i);
2052         }
2053
2054         return NULL;
2055 }
2056
2057 void exec_command_free_array(ExecCommand **c, unsigned n) {
2058         unsigned i;
2059
2060         for (i = 0; i < n; i++)
2061                 c[i] = exec_command_free_list(c[i]);
2062 }
2063
2064 typedef struct InvalidEnvInfo {
2065         const char *unit_id;
2066         const char *path;
2067 } InvalidEnvInfo;
2068
2069 static void invalid_env(const char *p, void *userdata) {
2070         InvalidEnvInfo *info = userdata;
2071
2072         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2073 }
2074
2075 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2076         char **i, **r = NULL;
2077
2078         assert(c);
2079         assert(l);
2080
2081         STRV_FOREACH(i, c->environment_files) {
2082                 char *fn;
2083                 int k;
2084                 bool ignore = false;
2085                 char **p;
2086                 _cleanup_globfree_ glob_t pglob = {};
2087                 int count, n;
2088
2089                 fn = *i;
2090
2091                 if (fn[0] == '-') {
2092                         ignore = true;
2093                         fn ++;
2094                 }
2095
2096                 if (!path_is_absolute(fn)) {
2097                         if (ignore)
2098                                 continue;
2099
2100                         strv_free(r);
2101                         return -EINVAL;
2102                 }
2103
2104                 /* Filename supports globbing, take all matching files */
2105                 errno = 0;
2106                 if (glob(fn, 0, NULL, &pglob) != 0) {
2107                         if (ignore)
2108                                 continue;
2109
2110                         strv_free(r);
2111                         return errno ? -errno : -EINVAL;
2112                 }
2113                 count = pglob.gl_pathc;
2114                 if (count == 0) {
2115                         if (ignore)
2116                                 continue;
2117
2118                         strv_free(r);
2119                         return -EINVAL;
2120                 }
2121                 for (n = 0; n < count; n++) {
2122                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2123                         if (k < 0) {
2124                                 if (ignore)
2125                                         continue;
2126
2127                                 strv_free(r);
2128                                 return k;
2129                         }
2130                         /* Log invalid environment variables with filename */
2131                         if (p) {
2132                                 InvalidEnvInfo info = {
2133                                         .unit_id = unit_id,
2134                                         .path = pglob.gl_pathv[n]
2135                                 };
2136
2137                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2138                         }
2139
2140                         if (r == NULL)
2141                                 r = p;
2142                         else {
2143                                 char **m;
2144
2145                                 m = strv_env_merge(2, r, p);
2146                                 strv_free(r);
2147                                 strv_free(p);
2148                                 if (!m)
2149                                         return -ENOMEM;
2150
2151                                 r = m;
2152                         }
2153                 }
2154         }
2155
2156         *l = r;
2157
2158         return 0;
2159 }
2160
2161 static bool tty_may_match_dev_console(const char *tty) {
2162         _cleanup_free_ char *active = NULL;
2163        char *console;
2164
2165         if (startswith(tty, "/dev/"))
2166                 tty += 5;
2167
2168         /* trivial identity? */
2169         if (streq(tty, "console"))
2170                 return true;
2171
2172         console = resolve_dev_console(&active);
2173         /* if we could not resolve, assume it may */
2174         if (!console)
2175                 return true;
2176
2177         /* "tty0" means the active VC, so it may be the same sometimes */
2178         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2179 }
2180
2181 bool exec_context_may_touch_console(ExecContext *ec) {
2182         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2183                 is_terminal_input(ec->std_input) ||
2184                 is_terminal_output(ec->std_output) ||
2185                 is_terminal_output(ec->std_error)) &&
2186                tty_may_match_dev_console(tty_path(ec));
2187 }
2188
2189 static void strv_fprintf(FILE *f, char **l) {
2190         char **g;
2191
2192         assert(f);
2193
2194         STRV_FOREACH(g, l)
2195                 fprintf(f, " %s", *g);
2196 }
2197
2198 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2199         char **e;
2200         unsigned i;
2201
2202         assert(c);
2203         assert(f);
2204
2205         prefix = strempty(prefix);
2206
2207         fprintf(f,
2208                 "%sUMask: %04o\n"
2209                 "%sWorkingDirectory: %s\n"
2210                 "%sRootDirectory: %s\n"
2211                 "%sNonBlocking: %s\n"
2212                 "%sPrivateTmp: %s\n"
2213                 "%sPrivateNetwork: %s\n"
2214                 "%sPrivateDevices: %s\n"
2215                 "%sProtectHome: %s\n"
2216                 "%sProtectSystem: %s\n"
2217                 "%sIgnoreSIGPIPE: %s\n",
2218                 prefix, c->umask,
2219                 prefix, c->working_directory ? c->working_directory : "/",
2220                 prefix, c->root_directory ? c->root_directory : "/",
2221                 prefix, yes_no(c->non_blocking),
2222                 prefix, yes_no(c->private_tmp),
2223                 prefix, yes_no(c->private_network),
2224                 prefix, yes_no(c->private_devices),
2225                 prefix, protect_home_to_string(c->protect_home),
2226                 prefix, protect_system_to_string(c->protect_system),
2227                 prefix, yes_no(c->ignore_sigpipe));
2228
2229         STRV_FOREACH(e, c->environment)
2230                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2231
2232         STRV_FOREACH(e, c->environment_files)
2233                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2234
2235         if (c->nice_set)
2236                 fprintf(f,
2237                         "%sNice: %i\n",
2238                         prefix, c->nice);
2239
2240         if (c->oom_score_adjust_set)
2241                 fprintf(f,
2242                         "%sOOMScoreAdjust: %i\n",
2243                         prefix, c->oom_score_adjust);
2244
2245         for (i = 0; i < RLIM_NLIMITS; i++)
2246                 if (c->rlimit[i])
2247                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2248                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2249
2250         if (c->ioprio_set) {
2251                 _cleanup_free_ char *class_str = NULL;
2252
2253                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2254                 fprintf(f,
2255                         "%sIOSchedulingClass: %s\n"
2256                         "%sIOPriority: %i\n",
2257                         prefix, strna(class_str),
2258                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2259         }
2260
2261         if (c->cpu_sched_set) {
2262                 _cleanup_free_ char *policy_str = NULL;
2263
2264                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2265                 fprintf(f,
2266                         "%sCPUSchedulingPolicy: %s\n"
2267                         "%sCPUSchedulingPriority: %i\n"
2268                         "%sCPUSchedulingResetOnFork: %s\n",
2269                         prefix, strna(policy_str),
2270                         prefix, c->cpu_sched_priority,
2271                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2272         }
2273
2274         if (c->cpuset) {
2275                 fprintf(f, "%sCPUAffinity:", prefix);
2276                 for (i = 0; i < c->cpuset_ncpus; i++)
2277                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2278                                 fprintf(f, " %u", i);
2279                 fputs("\n", f);
2280         }
2281
2282         if (c->timer_slack_nsec != NSEC_INFINITY)
2283                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2284
2285         fprintf(f,
2286                 "%sStandardInput: %s\n"
2287                 "%sStandardOutput: %s\n"
2288                 "%sStandardError: %s\n",
2289                 prefix, exec_input_to_string(c->std_input),
2290                 prefix, exec_output_to_string(c->std_output),
2291                 prefix, exec_output_to_string(c->std_error));
2292
2293         if (c->tty_path)
2294                 fprintf(f,
2295                         "%sTTYPath: %s\n"
2296                         "%sTTYReset: %s\n"
2297                         "%sTTYVHangup: %s\n"
2298                         "%sTTYVTDisallocate: %s\n",
2299                         prefix, c->tty_path,
2300                         prefix, yes_no(c->tty_reset),
2301                         prefix, yes_no(c->tty_vhangup),
2302                         prefix, yes_no(c->tty_vt_disallocate));
2303
2304         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2305             c->std_output == EXEC_OUTPUT_KMSG ||
2306             c->std_output == EXEC_OUTPUT_JOURNAL ||
2307             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2308             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2309             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2310             c->std_error == EXEC_OUTPUT_SYSLOG ||
2311             c->std_error == EXEC_OUTPUT_KMSG ||
2312             c->std_error == EXEC_OUTPUT_JOURNAL ||
2313             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2314             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2315             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2316
2317                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2318
2319                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2320                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2321
2322                 fprintf(f,
2323                         "%sSyslogFacility: %s\n"
2324                         "%sSyslogLevel: %s\n",
2325                         prefix, strna(fac_str),
2326                         prefix, strna(lvl_str));
2327         }
2328
2329         if (c->capabilities) {
2330                 _cleanup_cap_free_charp_ char *t;
2331
2332                 t = cap_to_text(c->capabilities, NULL);
2333                 if (t)
2334                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2335         }
2336
2337         if (c->secure_bits)
2338                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2339                         prefix,
2340                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2341                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2342                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2343                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2344                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2345                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2346
2347         if (c->capability_bounding_set_drop) {
2348                 unsigned long l;
2349                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2350
2351                 for (l = 0; l <= cap_last_cap(); l++)
2352                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2353                                 fprintf(f, " %s", strna(capability_to_name(l)));
2354
2355                 fputs("\n", f);
2356         }
2357
2358         if (c->user)
2359                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2360         if (c->group)
2361                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2362
2363         if (strv_length(c->supplementary_groups) > 0) {
2364                 fprintf(f, "%sSupplementaryGroups:", prefix);
2365                 strv_fprintf(f, c->supplementary_groups);
2366                 fputs("\n", f);
2367         }
2368
2369         if (c->pam_name)
2370                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2371
2372         if (strv_length(c->read_write_dirs) > 0) {
2373                 fprintf(f, "%sReadWriteDirs:", prefix);
2374                 strv_fprintf(f, c->read_write_dirs);
2375                 fputs("\n", f);
2376         }
2377
2378         if (strv_length(c->read_only_dirs) > 0) {
2379                 fprintf(f, "%sReadOnlyDirs:", prefix);
2380                 strv_fprintf(f, c->read_only_dirs);
2381                 fputs("\n", f);
2382         }
2383
2384         if (strv_length(c->inaccessible_dirs) > 0) {
2385                 fprintf(f, "%sInaccessibleDirs:", prefix);
2386                 strv_fprintf(f, c->inaccessible_dirs);
2387                 fputs("\n", f);
2388         }
2389
2390         if (c->utmp_id)
2391                 fprintf(f,
2392                         "%sUtmpIdentifier: %s\n",
2393                         prefix, c->utmp_id);
2394
2395         if (c->selinux_context)
2396                 fprintf(f,
2397                         "%sSELinuxContext: %s%s\n",
2398                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2399
2400         if (c->personality != 0xffffffffUL)
2401                 fprintf(f,
2402                         "%sPersonality: %s\n",
2403                         prefix, strna(personality_to_string(c->personality)));
2404
2405         if (c->syscall_filter) {
2406 #ifdef HAVE_SECCOMP
2407                 Iterator j;
2408                 void *id;
2409                 bool first = true;
2410 #endif
2411
2412                 fprintf(f,
2413                         "%sSystemCallFilter: ",
2414                         prefix);
2415
2416                 if (!c->syscall_whitelist)
2417                         fputc('~', f);
2418
2419 #ifdef HAVE_SECCOMP
2420                 SET_FOREACH(id, c->syscall_filter, j) {
2421                         _cleanup_free_ char *name = NULL;
2422
2423                         if (first)
2424                                 first = false;
2425                         else
2426                                 fputc(' ', f);
2427
2428                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2429                         fputs(strna(name), f);
2430                 }
2431 #endif
2432
2433                 fputc('\n', f);
2434         }
2435
2436         if (c->syscall_archs) {
2437 #ifdef HAVE_SECCOMP
2438                 Iterator j;
2439                 void *id;
2440 #endif
2441
2442                 fprintf(f,
2443                         "%sSystemCallArchitectures:",
2444                         prefix);
2445
2446 #ifdef HAVE_SECCOMP
2447                 SET_FOREACH(id, c->syscall_archs, j)
2448                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2449 #endif
2450                 fputc('\n', f);
2451         }
2452
2453         if (c->syscall_errno != 0)
2454                 fprintf(f,
2455                         "%sSystemCallErrorNumber: %s\n",
2456                         prefix, strna(errno_to_name(c->syscall_errno)));
2457
2458         if (c->apparmor_profile)
2459                 fprintf(f,
2460                         "%sAppArmorProfile: %s%s\n",
2461                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2462 }
2463
2464 bool exec_context_maintains_privileges(ExecContext *c) {
2465         assert(c);
2466
2467         /* Returns true if the process forked off would run run under
2468          * an unchanged UID or as root. */
2469
2470         if (!c->user)
2471                 return true;
2472
2473         if (streq(c->user, "root") || streq(c->user, "0"))
2474                 return true;
2475
2476         return false;
2477 }
2478
2479 void exec_status_start(ExecStatus *s, pid_t pid) {
2480         assert(s);
2481
2482         zero(*s);
2483         s->pid = pid;
2484         dual_timestamp_get(&s->start_timestamp);
2485 }
2486
2487 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2488         assert(s);
2489
2490         if (s->pid && s->pid != pid)
2491                 zero(*s);
2492
2493         s->pid = pid;
2494         dual_timestamp_get(&s->exit_timestamp);
2495
2496         s->code = code;
2497         s->status = status;
2498
2499         if (context) {
2500                 if (context->utmp_id)
2501                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2502
2503                 exec_context_tty_reset(context);
2504         }
2505 }
2506
2507 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2508         char buf[FORMAT_TIMESTAMP_MAX];
2509
2510         assert(s);
2511         assert(f);
2512
2513         if (s->pid <= 0)
2514                 return;
2515
2516         prefix = strempty(prefix);
2517
2518         fprintf(f,
2519                 "%sPID: "PID_FMT"\n",
2520                 prefix, s->pid);
2521
2522         if (s->start_timestamp.realtime > 0)
2523                 fprintf(f,
2524                         "%sStart Timestamp: %s\n",
2525                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2526
2527         if (s->exit_timestamp.realtime > 0)
2528                 fprintf(f,
2529                         "%sExit Timestamp: %s\n"
2530                         "%sExit Code: %s\n"
2531                         "%sExit Status: %i\n",
2532                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2533                         prefix, sigchld_code_to_string(s->code),
2534                         prefix, s->status);
2535 }
2536
2537 char *exec_command_line(char **argv) {
2538         size_t k;
2539         char *n, *p, **a;
2540         bool first = true;
2541
2542         assert(argv);
2543
2544         k = 1;
2545         STRV_FOREACH(a, argv)
2546                 k += strlen(*a)+3;
2547
2548         if (!(n = new(char, k)))
2549                 return NULL;
2550
2551         p = n;
2552         STRV_FOREACH(a, argv) {
2553
2554                 if (!first)
2555                         *(p++) = ' ';
2556                 else
2557                         first = false;
2558
2559                 if (strpbrk(*a, WHITESPACE)) {
2560                         *(p++) = '\'';
2561                         p = stpcpy(p, *a);
2562                         *(p++) = '\'';
2563                 } else
2564                         p = stpcpy(p, *a);
2565
2566         }
2567
2568         *p = 0;
2569
2570         /* FIXME: this doesn't really handle arguments that have
2571          * spaces and ticks in them */
2572
2573         return n;
2574 }
2575
2576 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2577         _cleanup_free_ char *cmd = NULL;
2578         const char *prefix2;
2579
2580         assert(c);
2581         assert(f);
2582
2583         prefix = strempty(prefix);
2584         prefix2 = strjoina(prefix, "\t");
2585
2586         cmd = exec_command_line(c->argv);
2587         fprintf(f,
2588                 "%sCommand Line: %s\n",
2589                 prefix, cmd ? cmd : strerror(ENOMEM));
2590
2591         exec_status_dump(&c->exec_status, f, prefix2);
2592 }
2593
2594 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2595         assert(f);
2596
2597         prefix = strempty(prefix);
2598
2599         LIST_FOREACH(command, c, c)
2600                 exec_command_dump(c, f, prefix);
2601 }
2602
2603 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2604         ExecCommand *end;
2605
2606         assert(l);
2607         assert(e);
2608
2609         if (*l) {
2610                 /* It's kind of important, that we keep the order here */
2611                 LIST_FIND_TAIL(command, *l, end);
2612                 LIST_INSERT_AFTER(command, *l, end, e);
2613         } else
2614               *l = e;
2615 }
2616
2617 int exec_command_set(ExecCommand *c, const char *path, ...) {
2618         va_list ap;
2619         char **l, *p;
2620
2621         assert(c);
2622         assert(path);
2623
2624         va_start(ap, path);
2625         l = strv_new_ap(path, ap);
2626         va_end(ap);
2627
2628         if (!l)
2629                 return -ENOMEM;
2630
2631         p = strdup(path);
2632         if (!p) {
2633                 strv_free(l);
2634                 return -ENOMEM;
2635         }
2636
2637         free(c->path);
2638         c->path = p;
2639
2640         strv_free(c->argv);
2641         c->argv = l;
2642
2643         return 0;
2644 }
2645
2646 int exec_command_append(ExecCommand *c, const char *path, ...) {
2647         _cleanup_strv_free_ char **l = NULL;
2648         va_list ap;
2649         int r;
2650
2651         assert(c);
2652         assert(path);
2653
2654         va_start(ap, path);
2655         l = strv_new_ap(path, ap);
2656         va_end(ap);
2657
2658         if (!l)
2659                 return -ENOMEM;
2660
2661         r = strv_extend_strv(&c->argv, l);
2662         if (r < 0)
2663                 return r;
2664
2665         return 0;
2666 }
2667
2668
2669 static int exec_runtime_allocate(ExecRuntime **rt) {
2670
2671         if (*rt)
2672                 return 0;
2673
2674         *rt = new0(ExecRuntime, 1);
2675         if (!*rt)
2676                 return -ENOMEM;
2677
2678         (*rt)->n_ref = 1;
2679         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2680
2681         return 0;
2682 }
2683
2684 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2685         int r;
2686
2687         assert(rt);
2688         assert(c);
2689         assert(id);
2690
2691         if (*rt)
2692                 return 1;
2693
2694         if (!c->private_network && !c->private_tmp)
2695                 return 0;
2696
2697         r = exec_runtime_allocate(rt);
2698         if (r < 0)
2699                 return r;
2700
2701         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2702                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2703                         return -errno;
2704         }
2705
2706         if (c->private_tmp && !(*rt)->tmp_dir) {
2707                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2708                 if (r < 0)
2709                         return r;
2710         }
2711
2712         return 1;
2713 }
2714
2715 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2716         assert(r);
2717         assert(r->n_ref > 0);
2718
2719         r->n_ref++;
2720         return r;
2721 }
2722
2723 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2724
2725         if (!r)
2726                 return NULL;
2727
2728         assert(r->n_ref > 0);
2729
2730         r->n_ref--;
2731         if (r->n_ref <= 0) {
2732                 free(r->tmp_dir);
2733                 free(r->var_tmp_dir);
2734                 safe_close_pair(r->netns_storage_socket);
2735                 free(r);
2736         }
2737
2738         return NULL;
2739 }
2740
2741 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2742         assert(u);
2743         assert(f);
2744         assert(fds);
2745
2746         if (!rt)
2747                 return 0;
2748
2749         if (rt->tmp_dir)
2750                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2751
2752         if (rt->var_tmp_dir)
2753                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2754
2755         if (rt->netns_storage_socket[0] >= 0) {
2756                 int copy;
2757
2758                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2759                 if (copy < 0)
2760                         return copy;
2761
2762                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2763         }
2764
2765         if (rt->netns_storage_socket[1] >= 0) {
2766                 int copy;
2767
2768                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2769                 if (copy < 0)
2770                         return copy;
2771
2772                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2773         }
2774
2775         return 0;
2776 }
2777
2778 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2779         int r;
2780
2781         assert(rt);
2782         assert(key);
2783         assert(value);
2784
2785         if (streq(key, "tmp-dir")) {
2786                 char *copy;
2787
2788                 r = exec_runtime_allocate(rt);
2789                 if (r < 0)
2790                         return r;
2791
2792                 copy = strdup(value);
2793                 if (!copy)
2794                         return log_oom();
2795
2796                 free((*rt)->tmp_dir);
2797                 (*rt)->tmp_dir = copy;
2798
2799         } else if (streq(key, "var-tmp-dir")) {
2800                 char *copy;
2801
2802                 r = exec_runtime_allocate(rt);
2803                 if (r < 0)
2804                         return r;
2805
2806                 copy = strdup(value);
2807                 if (!copy)
2808                         return log_oom();
2809
2810                 free((*rt)->var_tmp_dir);
2811                 (*rt)->var_tmp_dir = copy;
2812
2813         } else if (streq(key, "netns-socket-0")) {
2814                 int fd;
2815
2816                 r = exec_runtime_allocate(rt);
2817                 if (r < 0)
2818                         return r;
2819
2820                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2821                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2822                 else {
2823                         safe_close((*rt)->netns_storage_socket[0]);
2824                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2825                 }
2826         } else if (streq(key, "netns-socket-1")) {
2827                 int fd;
2828
2829                 r = exec_runtime_allocate(rt);
2830                 if (r < 0)
2831                         return r;
2832
2833                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2834                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2835                 else {
2836                         safe_close((*rt)->netns_storage_socket[1]);
2837                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2838                 }
2839         } else
2840                 return 0;
2841
2842         return 1;
2843 }
2844
2845 static void *remove_tmpdir_thread(void *p) {
2846         _cleanup_free_ char *path = p;
2847
2848         rm_rf_dangerous(path, false, true, false);
2849         return NULL;
2850 }
2851
2852 void exec_runtime_destroy(ExecRuntime *rt) {
2853         int r;
2854
2855         if (!rt)
2856                 return;
2857
2858         /* If there are multiple users of this, let's leave the stuff around */
2859         if (rt->n_ref > 1)
2860                 return;
2861
2862         if (rt->tmp_dir) {
2863                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2864
2865                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2866                 if (r < 0) {
2867                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2868                         free(rt->tmp_dir);
2869                 }
2870
2871                 rt->tmp_dir = NULL;
2872         }
2873
2874         if (rt->var_tmp_dir) {
2875                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2876
2877                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2878                 if (r < 0) {
2879                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2880                         free(rt->var_tmp_dir);
2881                 }
2882
2883                 rt->var_tmp_dir = NULL;
2884         }
2885
2886         safe_close_pair(rt->netns_storage_socket);
2887 }
2888
2889 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2890         [EXEC_INPUT_NULL] = "null",
2891         [EXEC_INPUT_TTY] = "tty",
2892         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2893         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2894         [EXEC_INPUT_SOCKET] = "socket"
2895 };
2896
2897 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2898
2899 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2900         [EXEC_OUTPUT_INHERIT] = "inherit",
2901         [EXEC_OUTPUT_NULL] = "null",
2902         [EXEC_OUTPUT_TTY] = "tty",
2903         [EXEC_OUTPUT_SYSLOG] = "syslog",
2904         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2905         [EXEC_OUTPUT_KMSG] = "kmsg",
2906         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2907         [EXEC_OUTPUT_JOURNAL] = "journal",
2908         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2909         [EXEC_OUTPUT_SOCKET] = "socket"
2910 };
2911
2912 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);