chiark / gitweb /
core: don't fail to run services in --user instances if $HOME is missing
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #ifdef HAVE_SELINUX
49 #include <selinux/selinux.h>
50 #endif
51
52 #ifdef HAVE_SECCOMP
53 #include <seccomp.h>
54 #endif
55
56 #ifdef HAVE_APPARMOR
57 #include <sys/apparmor.h>
58 #endif
59
60 #include "execute.h"
61 #include "strv.h"
62 #include "macro.h"
63 #include "capability.h"
64 #include "util.h"
65 #include "log.h"
66 #include "sd-messages.h"
67 #include "ioprio.h"
68 #include "securebits.h"
69 #include "namespace.h"
70 #include "exit-status.h"
71 #include "missing.h"
72 #include "utmp-wtmp.h"
73 #include "def.h"
74 #include "path-util.h"
75 #include "env-util.h"
76 #include "fileio.h"
77 #include "unit.h"
78 #include "async.h"
79 #include "selinux-util.h"
80 #include "errno-list.h"
81 #include "af-list.h"
82 #include "mkdir.h"
83 #include "apparmor-util.h"
84 #include "smack-util.h"
85 #include "bus-endpoint.h"
86 #include "label.h"
87 #include "cap-list.h"
88
89 #ifdef HAVE_SECCOMP
90 #include "seccomp-util.h"
91 #endif
92
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95
96 /* This assumes there is a 'tty' group */
97 #define TTY_MODE 0620
98
99 #define SNDBUF_SIZE (8*1024*1024)
100
101 static int shift_fds(int fds[], unsigned n_fds) {
102         int start, restart_from;
103
104         if (n_fds <= 0)
105                 return 0;
106
107         /* Modifies the fds array! (sorts it) */
108
109         assert(fds);
110
111         start = 0;
112         for (;;) {
113                 int i;
114
115                 restart_from = -1;
116
117                 for (i = start; i < (int) n_fds; i++) {
118                         int nfd;
119
120                         /* Already at right index? */
121                         if (fds[i] == i+3)
122                                 continue;
123
124                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
125                                 return -errno;
126
127                         safe_close(fds[i]);
128                         fds[i] = nfd;
129
130                         /* Hmm, the fd we wanted isn't free? Then
131                          * let's remember that and try again from here */
132                         if (nfd != i+3 && restart_from < 0)
133                                 restart_from = i;
134                 }
135
136                 if (restart_from < 0)
137                         break;
138
139                 start = restart_from;
140         }
141
142         return 0;
143 }
144
145 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
146         unsigned i;
147         int r;
148
149         if (n_fds <= 0)
150                 return 0;
151
152         assert(fds);
153
154         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
155
156         for (i = 0; i < n_fds; i++) {
157
158                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159                         return r;
160
161                 /* We unconditionally drop FD_CLOEXEC from the fds,
162                  * since after all we want to pass these fds to our
163                  * children */
164
165                 if ((r = fd_cloexec(fds[i], false)) < 0)
166                         return r;
167         }
168
169         return 0;
170 }
171
172 _pure_ static const char *tty_path(const ExecContext *context) {
173         assert(context);
174
175         if (context->tty_path)
176                 return context->tty_path;
177
178         return "/dev/console";
179 }
180
181 static void exec_context_tty_reset(const ExecContext *context) {
182         assert(context);
183
184         if (context->tty_vhangup)
185                 terminal_vhangup(tty_path(context));
186
187         if (context->tty_reset)
188                 reset_terminal(tty_path(context));
189
190         if (context->tty_vt_disallocate && context->tty_path)
191                 vt_disallocate(context->tty_path);
192 }
193
194 static bool is_terminal_output(ExecOutput o) {
195         return
196                 o == EXEC_OUTPUT_TTY ||
197                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 }
201
202 static int open_null_as(int flags, int nfd) {
203         int fd, r;
204
205         assert(nfd >= 0);
206
207         fd = open("/dev/null", flags|O_NOCTTY);
208         if (fd < 0)
209                 return -errno;
210
211         if (fd != nfd) {
212                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
213                 safe_close(fd);
214         } else
215                 r = nfd;
216
217         return r;
218 }
219
220 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225         uid_t olduid = UID_INVALID;
226         gid_t oldgid = GID_INVALID;
227         int r;
228
229         if (gid != GID_INVALID) {
230                 oldgid = getgid();
231
232                 r = setegid(gid);
233                 if (r < 0)
234                         return -errno;
235         }
236
237         if (uid != UID_INVALID) {
238                 olduid = getuid();
239
240                 r = seteuid(uid);
241                 if (r < 0) {
242                         r = -errno;
243                         goto restore_gid;
244                 }
245         }
246
247         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
248         if (r < 0)
249                 r = -errno;
250
251         /* If we fail to restore the uid or gid, things will likely
252            fail later on. This should only happen if an LSM interferes. */
253
254         if (uid != UID_INVALID)
255                 (void) seteuid(olduid);
256
257  restore_gid:
258         if (gid != GID_INVALID)
259                 (void) setegid(oldgid);
260
261         return r;
262 }
263
264 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
265         int fd, r;
266
267         assert(context);
268         assert(output < _EXEC_OUTPUT_MAX);
269         assert(ident);
270         assert(nfd >= 0);
271
272         fd = socket(AF_UNIX, SOCK_STREAM, 0);
273         if (fd < 0)
274                 return -errno;
275
276         r = connect_journal_socket(fd, uid, gid);
277         if (r < 0)
278                 return r;
279
280         if (shutdown(fd, SHUT_RD) < 0) {
281                 safe_close(fd);
282                 return -errno;
283         }
284
285         fd_inc_sndbuf(fd, SNDBUF_SIZE);
286
287         dprintf(fd,
288                 "%s\n"
289                 "%s\n"
290                 "%i\n"
291                 "%i\n"
292                 "%i\n"
293                 "%i\n"
294                 "%i\n",
295                 context->syslog_identifier ? context->syslog_identifier : ident,
296                 unit_id,
297                 context->syslog_priority,
298                 !!context->syslog_level_prefix,
299                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
300                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
301                 is_terminal_output(output));
302
303         if (fd != nfd) {
304                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
305                 safe_close(fd);
306         } else
307                 r = nfd;
308
309         return r;
310 }
311 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
312         int fd, r;
313
314         assert(path);
315         assert(nfd >= 0);
316
317         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
318                 return fd;
319
320         if (fd != nfd) {
321                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
322                 safe_close(fd);
323         } else
324                 r = nfd;
325
326         return r;
327 }
328
329 static bool is_terminal_input(ExecInput i) {
330         return
331                 i == EXEC_INPUT_TTY ||
332                 i == EXEC_INPUT_TTY_FORCE ||
333                 i == EXEC_INPUT_TTY_FAIL;
334 }
335
336 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
337
338         if (is_terminal_input(std_input) && !apply_tty_stdin)
339                 return EXEC_INPUT_NULL;
340
341         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
342                 return EXEC_INPUT_NULL;
343
344         return std_input;
345 }
346
347 static int fixup_output(ExecOutput std_output, int socket_fd) {
348
349         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
350                 return EXEC_OUTPUT_INHERIT;
351
352         return std_output;
353 }
354
355 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
356         ExecInput i;
357
358         assert(context);
359
360         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
361
362         switch (i) {
363
364         case EXEC_INPUT_NULL:
365                 return open_null_as(O_RDONLY, STDIN_FILENO);
366
367         case EXEC_INPUT_TTY:
368         case EXEC_INPUT_TTY_FORCE:
369         case EXEC_INPUT_TTY_FAIL: {
370                 int fd, r;
371
372                 fd = acquire_terminal(tty_path(context),
373                                       i == EXEC_INPUT_TTY_FAIL,
374                                       i == EXEC_INPUT_TTY_FORCE,
375                                       false,
376                                       USEC_INFINITY);
377                 if (fd < 0)
378                         return fd;
379
380                 if (fd != STDIN_FILENO) {
381                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
382                         safe_close(fd);
383                 } else
384                         r = STDIN_FILENO;
385
386                 return r;
387         }
388
389         case EXEC_INPUT_SOCKET:
390                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
391
392         default:
393                 assert_not_reached("Unknown input type");
394         }
395 }
396
397 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
398         ExecOutput o;
399         ExecInput i;
400         int r;
401
402         assert(context);
403         assert(ident);
404
405         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
406         o = fixup_output(context->std_output, socket_fd);
407
408         if (fileno == STDERR_FILENO) {
409                 ExecOutput e;
410                 e = fixup_output(context->std_error, socket_fd);
411
412                 /* This expects the input and output are already set up */
413
414                 /* Don't change the stderr file descriptor if we inherit all
415                  * the way and are not on a tty */
416                 if (e == EXEC_OUTPUT_INHERIT &&
417                     o == EXEC_OUTPUT_INHERIT &&
418                     i == EXEC_INPUT_NULL &&
419                     !is_terminal_input(context->std_input) &&
420                     getppid () != 1)
421                         return fileno;
422
423                 /* Duplicate from stdout if possible */
424                 if (e == o || e == EXEC_OUTPUT_INHERIT)
425                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
426
427                 o = e;
428
429         } else if (o == EXEC_OUTPUT_INHERIT) {
430                 /* If input got downgraded, inherit the original value */
431                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
432                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
433
434                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
435                 if (i != EXEC_INPUT_NULL)
436                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
437
438                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
439                 if (getppid() != 1)
440                         return fileno;
441
442                 /* We need to open /dev/null here anew, to get the right access mode. */
443                 return open_null_as(O_WRONLY, fileno);
444         }
445
446         switch (o) {
447
448         case EXEC_OUTPUT_NULL:
449                 return open_null_as(O_WRONLY, fileno);
450
451         case EXEC_OUTPUT_TTY:
452                 if (is_terminal_input(i))
453                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
454
455                 /* We don't reset the terminal if this is just about output */
456                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
457
458         case EXEC_OUTPUT_SYSLOG:
459         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
460         case EXEC_OUTPUT_KMSG:
461         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
462         case EXEC_OUTPUT_JOURNAL:
463         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
464                 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
465                 if (r < 0) {
466                         log_unit_struct(unit_id,
467                                         LOG_ERR,
468                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
469                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
470                                                     unit_id, strerror(-r)),
471                                         LOG_ERRNO(-r),
472                                         NULL);
473                         r = open_null_as(O_WRONLY, fileno);
474                 }
475                 return r;
476
477         case EXEC_OUTPUT_SOCKET:
478                 assert(socket_fd >= 0);
479                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
480
481         default:
482                 assert_not_reached("Unknown error type");
483         }
484 }
485
486 static int chown_terminal(int fd, uid_t uid) {
487         struct stat st;
488
489         assert(fd >= 0);
490
491         /* This might fail. What matters are the results. */
492         (void) fchown(fd, uid, -1);
493         (void) fchmod(fd, TTY_MODE);
494
495         if (fstat(fd, &st) < 0)
496                 return -errno;
497
498         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
499                 return -EPERM;
500
501         return 0;
502 }
503
504 static int setup_confirm_stdio(int *_saved_stdin,
505                                int *_saved_stdout) {
506         int fd = -1, saved_stdin, saved_stdout = -1, r;
507
508         assert(_saved_stdin);
509         assert(_saved_stdout);
510
511         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
512         if (saved_stdin < 0)
513                 return -errno;
514
515         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516         if (saved_stdout < 0) {
517                 r = errno;
518                 goto fail;
519         }
520
521         fd = acquire_terminal(
522                         "/dev/console",
523                         false,
524                         false,
525                         false,
526                         DEFAULT_CONFIRM_USEC);
527         if (fd < 0) {
528                 r = fd;
529                 goto fail;
530         }
531
532         r = chown_terminal(fd, getuid());
533         if (r < 0)
534                 goto fail;
535
536         if (dup2(fd, STDIN_FILENO) < 0) {
537                 r = -errno;
538                 goto fail;
539         }
540
541         if (dup2(fd, STDOUT_FILENO) < 0) {
542                 r = -errno;
543                 goto fail;
544         }
545
546         if (fd >= 2)
547                 safe_close(fd);
548
549         *_saved_stdin = saved_stdin;
550         *_saved_stdout = saved_stdout;
551
552         return 0;
553
554 fail:
555         safe_close(saved_stdout);
556         safe_close(saved_stdin);
557         safe_close(fd);
558
559         return r;
560 }
561
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563         _cleanup_close_ int fd = -1;
564         va_list ap;
565
566         assert(format);
567
568         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
569         if (fd < 0)
570                 return fd;
571
572         va_start(ap, format);
573         vdprintf(fd, format, ap);
574         va_end(ap);
575
576         return 0;
577 }
578
579 static int restore_confirm_stdio(int *saved_stdin,
580                                  int *saved_stdout) {
581
582         int r = 0;
583
584         assert(saved_stdin);
585         assert(saved_stdout);
586
587         release_terminal();
588
589         if (*saved_stdin >= 0)
590                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
591                         r = -errno;
592
593         if (*saved_stdout >= 0)
594                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
595                         r = -errno;
596
597         safe_close(*saved_stdin);
598         safe_close(*saved_stdout);
599
600         return r;
601 }
602
603 static int ask_for_confirmation(char *response, char **argv) {
604         int saved_stdout = -1, saved_stdin = -1, r;
605         _cleanup_free_ char *line = NULL;
606
607         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
608         if (r < 0)
609                 return r;
610
611         line = exec_command_line(argv);
612         if (!line)
613                 return -ENOMEM;
614
615         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
616
617         restore_confirm_stdio(&saved_stdin, &saved_stdout);
618
619         return r;
620 }
621
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623         bool keep_groups = false;
624         int r;
625
626         assert(context);
627
628         /* Lookup and set GID and supplementary group list. Here too
629          * we avoid NSS lookups for gid=0. */
630
631         if (context->group || username) {
632
633                 if (context->group) {
634                         const char *g = context->group;
635
636                         if ((r = get_group_creds(&g, &gid)) < 0)
637                                 return r;
638                 }
639
640                 /* First step, initialize groups from /etc/groups */
641                 if (username && gid != 0) {
642                         if (initgroups(username, gid) < 0)
643                                 return -errno;
644
645                         keep_groups = true;
646                 }
647
648                 /* Second step, set our gids */
649                 if (setresgid(gid, gid, gid) < 0)
650                         return -errno;
651         }
652
653         if (context->supplementary_groups) {
654                 int ngroups_max, k;
655                 gid_t *gids;
656                 char **i;
657
658                 /* Final step, initialize any manually set supplementary groups */
659                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
660
661                 if (!(gids = new(gid_t, ngroups_max)))
662                         return -ENOMEM;
663
664                 if (keep_groups) {
665                         if ((k = getgroups(ngroups_max, gids)) < 0) {
666                                 free(gids);
667                                 return -errno;
668                         }
669                 } else
670                         k = 0;
671
672                 STRV_FOREACH(i, context->supplementary_groups) {
673                         const char *g;
674
675                         if (k >= ngroups_max) {
676                                 free(gids);
677                                 return -E2BIG;
678                         }
679
680                         g = *i;
681                         r = get_group_creds(&g, gids+k);
682                         if (r < 0) {
683                                 free(gids);
684                                 return r;
685                         }
686
687                         k++;
688                 }
689
690                 if (setgroups(k, gids) < 0) {
691                         free(gids);
692                         return -errno;
693                 }
694
695                 free(gids);
696         }
697
698         return 0;
699 }
700
701 static int enforce_user(const ExecContext *context, uid_t uid) {
702         assert(context);
703
704         /* Sets (but doesn't lookup) the uid and make sure we keep the
705          * capabilities while doing so. */
706
707         if (context->capabilities) {
708                 _cleanup_cap_free_ cap_t d = NULL;
709                 static const cap_value_t bits[] = {
710                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
711                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
712                 };
713
714                 /* First step: If we need to keep capabilities but
715                  * drop privileges we need to make sure we keep our
716                  * caps, while we drop privileges. */
717                 if (uid != 0) {
718                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
719
720                         if (prctl(PR_GET_SECUREBITS) != sb)
721                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
722                                         return -errno;
723                 }
724
725                 /* Second step: set the capabilities. This will reduce
726                  * the capabilities to the minimum we need. */
727
728                 d = cap_dup(context->capabilities);
729                 if (!d)
730                         return -errno;
731
732                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
733                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
734                         return -errno;
735
736                 if (cap_set_proc(d) < 0)
737                         return -errno;
738         }
739
740         /* Third step: actually set the uids */
741         if (setresuid(uid, uid, uid) < 0)
742                 return -errno;
743
744         /* At this point we should have all necessary capabilities but
745            are otherwise a normal user. However, the caps might got
746            corrupted due to the setresuid() so we need clean them up
747            later. This is done outside of this call. */
748
749         return 0;
750 }
751
752 #ifdef HAVE_PAM
753
754 static int null_conv(
755                 int num_msg,
756                 const struct pam_message **msg,
757                 struct pam_response **resp,
758                 void *appdata_ptr) {
759
760         /* We don't support conversations */
761
762         return PAM_CONV_ERR;
763 }
764
765 static int setup_pam(
766                 const char *name,
767                 const char *user,
768                 uid_t uid,
769                 const char *tty,
770                 char ***pam_env,
771                 int fds[], unsigned n_fds) {
772
773         static const struct pam_conv conv = {
774                 .conv = null_conv,
775                 .appdata_ptr = NULL
776         };
777
778         pam_handle_t *handle = NULL;
779         sigset_t ss, old_ss;
780         int pam_code = PAM_SUCCESS;
781         int err;
782         char **e = NULL;
783         bool close_session = false;
784         pid_t pam_pid = 0, parent_pid;
785         int flags = 0;
786
787         assert(name);
788         assert(user);
789         assert(pam_env);
790
791         /* We set up PAM in the parent process, then fork. The child
792          * will then stay around until killed via PR_GET_PDEATHSIG or
793          * systemd via the cgroup logic. It will then remove the PAM
794          * session again. The parent process will exec() the actual
795          * daemon. We do things this way to ensure that the main PID
796          * of the daemon is the one we initially fork()ed. */
797
798         if (log_get_max_level() < LOG_DEBUG)
799                 flags |= PAM_SILENT;
800
801         pam_code = pam_start(name, user, &conv, &handle);
802         if (pam_code != PAM_SUCCESS) {
803                 handle = NULL;
804                 goto fail;
805         }
806
807         if (tty) {
808                 pam_code = pam_set_item(handle, PAM_TTY, tty);
809                 if (pam_code != PAM_SUCCESS)
810                         goto fail;
811         }
812
813         pam_code = pam_acct_mgmt(handle, flags);
814         if (pam_code != PAM_SUCCESS)
815                 goto fail;
816
817         pam_code = pam_open_session(handle, flags);
818         if (pam_code != PAM_SUCCESS)
819                 goto fail;
820
821         close_session = true;
822
823         e = pam_getenvlist(handle);
824         if (!e) {
825                 pam_code = PAM_BUF_ERR;
826                 goto fail;
827         }
828
829         /* Block SIGTERM, so that we know that it won't get lost in
830          * the child */
831         if (sigemptyset(&ss) < 0 ||
832             sigaddset(&ss, SIGTERM) < 0 ||
833             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
834                 goto fail;
835
836         parent_pid = getpid();
837
838         pam_pid = fork();
839         if (pam_pid < 0)
840                 goto fail;
841
842         if (pam_pid == 0) {
843                 int sig;
844                 int r = EXIT_PAM;
845
846                 /* The child's job is to reset the PAM session on
847                  * termination */
848
849                 /* This string must fit in 10 chars (i.e. the length
850                  * of "/sbin/init"), to look pretty in /bin/ps */
851                 rename_process("(sd-pam)");
852
853                 /* Make sure we don't keep open the passed fds in this
854                 child. We assume that otherwise only those fds are
855                 open here that have been opened by PAM. */
856                 close_many(fds, n_fds);
857
858                 /* Drop privileges - we don't need any to pam_close_session
859                  * and this will make PR_SET_PDEATHSIG work in most cases.
860                  * If this fails, ignore the error - but expect sd-pam threads
861                  * to fail to exit normally */
862                 if (setresuid(uid, uid, uid) < 0)
863                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
864
865                 /* Wait until our parent died. This will only work if
866                  * the above setresuid() succeeds, otherwise the kernel
867                  * will not allow unprivileged parents kill their privileged
868                  * children this way. We rely on the control groups kill logic
869                  * to do the rest for us. */
870                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
871                         goto child_finish;
872
873                 /* Check if our parent process might already have
874                  * died? */
875                 if (getppid() == parent_pid) {
876                         for (;;) {
877                                 if (sigwait(&ss, &sig) < 0) {
878                                         if (errno == EINTR)
879                                                 continue;
880
881                                         goto child_finish;
882                                 }
883
884                                 assert(sig == SIGTERM);
885                                 break;
886                         }
887                 }
888
889                 /* If our parent died we'll end the session */
890                 if (getppid() != parent_pid) {
891                         pam_code = pam_close_session(handle, flags);
892                         if (pam_code != PAM_SUCCESS)
893                                 goto child_finish;
894                 }
895
896                 r = 0;
897
898         child_finish:
899                 pam_end(handle, pam_code | flags);
900                 _exit(r);
901         }
902
903         /* If the child was forked off successfully it will do all the
904          * cleanups, so forget about the handle here. */
905         handle = NULL;
906
907         /* Unblock SIGTERM again in the parent */
908         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
909                 goto fail;
910
911         /* We close the log explicitly here, since the PAM modules
912          * might have opened it, but we don't want this fd around. */
913         closelog();
914
915         *pam_env = e;
916         e = NULL;
917
918         return 0;
919
920 fail:
921         if (pam_code != PAM_SUCCESS) {
922                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
923                 err = -EPERM;  /* PAM errors do not map to errno */
924         } else {
925                 log_error_errno(errno, "PAM failed: %m");
926                 err = -errno;
927         }
928
929         if (handle) {
930                 if (close_session)
931                         pam_code = pam_close_session(handle, flags);
932
933                 pam_end(handle, pam_code | flags);
934         }
935
936         strv_free(e);
937
938         closelog();
939
940         if (pam_pid > 1) {
941                 kill(pam_pid, SIGTERM);
942                 kill(pam_pid, SIGCONT);
943         }
944
945         return err;
946 }
947 #endif
948
949 static void rename_process_from_path(const char *path) {
950         char process_name[11];
951         const char *p;
952         size_t l;
953
954         /* This resulting string must fit in 10 chars (i.e. the length
955          * of "/sbin/init") to look pretty in /bin/ps */
956
957         p = basename(path);
958         if (isempty(p)) {
959                 rename_process("(...)");
960                 return;
961         }
962
963         l = strlen(p);
964         if (l > 8) {
965                 /* The end of the process name is usually more
966                  * interesting, since the first bit might just be
967                  * "systemd-" */
968                 p = p + l - 8;
969                 l = 8;
970         }
971
972         process_name[0] = '(';
973         memcpy(process_name+1, p, l);
974         process_name[1+l] = ')';
975         process_name[1+l+1] = 0;
976
977         rename_process(process_name);
978 }
979
980 #ifdef HAVE_SECCOMP
981
982 static int apply_seccomp(const ExecContext *c) {
983         uint32_t negative_action, action;
984         scmp_filter_ctx *seccomp;
985         Iterator i;
986         void *id;
987         int r;
988
989         assert(c);
990
991         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
992
993         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
994         if (!seccomp)
995                 return -ENOMEM;
996
997         if (c->syscall_archs) {
998
999                 SET_FOREACH(id, c->syscall_archs, i) {
1000                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1001                         if (r == -EEXIST)
1002                                 continue;
1003                         if (r < 0)
1004                                 goto finish;
1005                 }
1006
1007         } else {
1008                 r = seccomp_add_secondary_archs(seccomp);
1009                 if (r < 0)
1010                         goto finish;
1011         }
1012
1013         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1014         SET_FOREACH(id, c->syscall_filter, i) {
1015                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1016                 if (r < 0)
1017                         goto finish;
1018         }
1019
1020         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1021         if (r < 0)
1022                 goto finish;
1023
1024         r = seccomp_load(seccomp);
1025
1026 finish:
1027         seccomp_release(seccomp);
1028         return r;
1029 }
1030
1031 static int apply_address_families(const ExecContext *c) {
1032         scmp_filter_ctx *seccomp;
1033         Iterator i;
1034         int r;
1035
1036         assert(c);
1037
1038         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1039         if (!seccomp)
1040                 return -ENOMEM;
1041
1042         r = seccomp_add_secondary_archs(seccomp);
1043         if (r < 0)
1044                 goto finish;
1045
1046         if (c->address_families_whitelist) {
1047                 int af, first = 0, last = 0;
1048                 void *afp;
1049
1050                 /* If this is a whitelist, we first block the address
1051                  * families that are out of range and then everything
1052                  * that is not in the set. First, we find the lowest
1053                  * and highest address family in the set. */
1054
1055                 SET_FOREACH(afp, c->address_families, i) {
1056                         af = PTR_TO_INT(afp);
1057
1058                         if (af <= 0 || af >= af_max())
1059                                 continue;
1060
1061                         if (first == 0 || af < first)
1062                                 first = af;
1063
1064                         if (last == 0 || af > last)
1065                                 last = af;
1066                 }
1067
1068                 assert((first == 0) == (last == 0));
1069
1070                 if (first == 0) {
1071
1072                         /* No entries in the valid range, block everything */
1073                         r = seccomp_rule_add(
1074                                         seccomp,
1075                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076                                         SCMP_SYS(socket),
1077                                         0);
1078                         if (r < 0)
1079                                 goto finish;
1080
1081                 } else {
1082
1083                         /* Block everything below the first entry */
1084                         r = seccomp_rule_add(
1085                                         seccomp,
1086                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1087                                         SCMP_SYS(socket),
1088                                         1,
1089                                         SCMP_A0(SCMP_CMP_LT, first));
1090                         if (r < 0)
1091                                 goto finish;
1092
1093                         /* Block everything above the last entry */
1094                         r = seccomp_rule_add(
1095                                         seccomp,
1096                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097                                         SCMP_SYS(socket),
1098                                         1,
1099                                         SCMP_A0(SCMP_CMP_GT, last));
1100                         if (r < 0)
1101                                 goto finish;
1102
1103                         /* Block everything between the first and last
1104                          * entry */
1105                         for (af = 1; af < af_max(); af++) {
1106
1107                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1108                                         continue;
1109
1110                                 r = seccomp_rule_add(
1111                                                 seccomp,
1112                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1113                                                 SCMP_SYS(socket),
1114                                                 1,
1115                                                 SCMP_A0(SCMP_CMP_EQ, af));
1116                                 if (r < 0)
1117                                         goto finish;
1118                         }
1119                 }
1120
1121         } else {
1122                 void *af;
1123
1124                 /* If this is a blacklist, then generate one rule for
1125                  * each address family that are then combined in OR
1126                  * checks. */
1127
1128                 SET_FOREACH(af, c->address_families, i) {
1129
1130                         r = seccomp_rule_add(
1131                                         seccomp,
1132                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1133                                         SCMP_SYS(socket),
1134                                         1,
1135                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1136                         if (r < 0)
1137                                 goto finish;
1138                 }
1139         }
1140
1141         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1142         if (r < 0)
1143                 goto finish;
1144
1145         r = seccomp_load(seccomp);
1146
1147 finish:
1148         seccomp_release(seccomp);
1149         return r;
1150 }
1151
1152 #endif
1153
1154 static void do_idle_pipe_dance(int idle_pipe[4]) {
1155         assert(idle_pipe);
1156
1157
1158         safe_close(idle_pipe[1]);
1159         safe_close(idle_pipe[2]);
1160
1161         if (idle_pipe[0] >= 0) {
1162                 int r;
1163
1164                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1165
1166                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1167                         /* Signal systemd that we are bored and want to continue. */
1168                         write(idle_pipe[3], "x", 1);
1169
1170                         /* Wait for systemd to react to the signal above. */
1171                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1172                 }
1173
1174                 safe_close(idle_pipe[0]);
1175
1176         }
1177
1178         safe_close(idle_pipe[3]);
1179 }
1180
1181 static int build_environment(
1182                 const ExecContext *c,
1183                 unsigned n_fds,
1184                 usec_t watchdog_usec,
1185                 const char *home,
1186                 const char *username,
1187                 const char *shell,
1188                 char ***ret) {
1189
1190         _cleanup_strv_free_ char **our_env = NULL;
1191         unsigned n_env = 0;
1192         char *x;
1193
1194         assert(c);
1195         assert(ret);
1196
1197         our_env = new0(char*, 10);
1198         if (!our_env)
1199                 return -ENOMEM;
1200
1201         if (n_fds > 0) {
1202                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1203                         return -ENOMEM;
1204                 our_env[n_env++] = x;
1205
1206                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1207                         return -ENOMEM;
1208                 our_env[n_env++] = x;
1209         }
1210
1211         if (watchdog_usec > 0) {
1212                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1213                         return -ENOMEM;
1214                 our_env[n_env++] = x;
1215
1216                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1217                         return -ENOMEM;
1218                 our_env[n_env++] = x;
1219         }
1220
1221         if (home) {
1222                 x = strappend("HOME=", home);
1223                 if (!x)
1224                         return -ENOMEM;
1225                 our_env[n_env++] = x;
1226         }
1227
1228         if (username) {
1229                 x = strappend("LOGNAME=", username);
1230                 if (!x)
1231                         return -ENOMEM;
1232                 our_env[n_env++] = x;
1233
1234                 x = strappend("USER=", username);
1235                 if (!x)
1236                         return -ENOMEM;
1237                 our_env[n_env++] = x;
1238         }
1239
1240         if (shell) {
1241                 x = strappend("SHELL=", shell);
1242                 if (!x)
1243                         return -ENOMEM;
1244                 our_env[n_env++] = x;
1245         }
1246
1247         if (is_terminal_input(c->std_input) ||
1248             c->std_output == EXEC_OUTPUT_TTY ||
1249             c->std_error == EXEC_OUTPUT_TTY ||
1250             c->tty_path) {
1251
1252                 x = strdup(default_term_for_tty(tty_path(c)));
1253                 if (!x)
1254                         return -ENOMEM;
1255                 our_env[n_env++] = x;
1256         }
1257
1258         our_env[n_env++] = NULL;
1259         assert(n_env <= 10);
1260
1261         *ret = our_env;
1262         our_env = NULL;
1263
1264         return 0;
1265 }
1266
1267 static int exec_child(
1268                 ExecCommand *command,
1269                 const ExecContext *context,
1270                 const ExecParameters *params,
1271                 ExecRuntime *runtime,
1272                 char **argv,
1273                 int socket_fd,
1274                 int *fds, unsigned n_fds,
1275                 char **files_env,
1276                 int *exit_status) {
1277
1278         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1279         _cleanup_free_ char *mac_selinux_context_net = NULL;
1280         const char *username = NULL, *home = NULL, *shell = NULL;
1281         unsigned n_dont_close = 0;
1282         int dont_close[n_fds + 4];
1283         uid_t uid = UID_INVALID;
1284         gid_t gid = GID_INVALID;
1285         int i, r;
1286
1287         assert(command);
1288         assert(context);
1289         assert(params);
1290         assert(exit_status);
1291
1292         rename_process_from_path(command->path);
1293
1294         /* We reset exactly these signals, since they are the
1295          * only ones we set to SIG_IGN in the main daemon. All
1296          * others we leave untouched because we set them to
1297          * SIG_DFL or a valid handler initially, both of which
1298          * will be demoted to SIG_DFL. */
1299         default_signals(SIGNALS_CRASH_HANDLER,
1300                         SIGNALS_IGNORE, -1);
1301
1302         if (context->ignore_sigpipe)
1303                 ignore_signals(SIGPIPE, -1);
1304
1305         r = reset_signal_mask();
1306         if (r < 0) {
1307                 *exit_status = EXIT_SIGNAL_MASK;
1308                 return r;
1309         }
1310
1311         if (params->idle_pipe)
1312                 do_idle_pipe_dance(params->idle_pipe);
1313
1314         /* Close sockets very early to make sure we don't
1315          * block init reexecution because it cannot bind its
1316          * sockets */
1317
1318         log_forget_fds();
1319
1320         if (socket_fd >= 0)
1321                 dont_close[n_dont_close++] = socket_fd;
1322         if (n_fds > 0) {
1323                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324                 n_dont_close += n_fds;
1325         }
1326         if (params->bus_endpoint_fd >= 0)
1327                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1328         if (runtime) {
1329                 if (runtime->netns_storage_socket[0] >= 0)
1330                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331                 if (runtime->netns_storage_socket[1] >= 0)
1332                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1333         }
1334
1335         r = close_all_fds(dont_close, n_dont_close);
1336         if (r < 0) {
1337                 *exit_status = EXIT_FDS;
1338                 return r;
1339         }
1340
1341         if (!context->same_pgrp)
1342                 if (setsid() < 0) {
1343                         *exit_status = EXIT_SETSID;
1344                         return -errno;
1345                 }
1346
1347         exec_context_tty_reset(context);
1348
1349         if (params->confirm_spawn) {
1350                 char response;
1351
1352                 r = ask_for_confirmation(&response, argv);
1353                 if (r == -ETIMEDOUT)
1354                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1355                 else if (r < 0)
1356                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1357                 else if (response == 's') {
1358                         write_confirm_message("Skipping execution.\n");
1359                         *exit_status = EXIT_CONFIRM;
1360                         return -ECANCELED;
1361                 } else if (response == 'n') {
1362                         write_confirm_message("Failing execution.\n");
1363                         *exit_status = 0;
1364                         return 0;
1365                 }
1366         }
1367
1368         if (context->user) {
1369                 username = context->user;
1370                 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1371                 if (r < 0) {
1372                         *exit_status = EXIT_USER;
1373                         return r;
1374                 }
1375         }
1376
1377         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378          * must sure to drop O_NONBLOCK */
1379         if (socket_fd >= 0)
1380                 fd_nonblock(socket_fd, false);
1381
1382         r = setup_input(context, socket_fd, params->apply_tty_stdin);
1383         if (r < 0) {
1384                 *exit_status = EXIT_STDIN;
1385                 return r;
1386         }
1387
1388         r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389         if (r < 0) {
1390                 *exit_status = EXIT_STDOUT;
1391                 return r;
1392         }
1393
1394         r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1395         if (r < 0) {
1396                 *exit_status = EXIT_STDERR;
1397                 return r;
1398         }
1399
1400         if (params->cgroup_path) {
1401                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1402                 if (r < 0) {
1403                         *exit_status = EXIT_CGROUP;
1404                         return r;
1405                 }
1406         }
1407
1408         if (context->oom_score_adjust_set) {
1409                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1410
1411                 /* When we can't make this change due to EPERM, then
1412                  * let's silently skip over it. User namespaces
1413                  * prohibit write access to this file, and we
1414                  * shouldn't trip up over that. */
1415
1416                 sprintf(t, "%i", context->oom_score_adjust);
1417                 r = write_string_file("/proc/self/oom_score_adj", t);
1418                 if (r == -EPERM || r == -EACCES) {
1419                         log_open();
1420                         log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1421                         log_close();
1422                 } else if (r < 0) {
1423                         *exit_status = EXIT_OOM_ADJUST;
1424                         return -errno;
1425                 }
1426         }
1427
1428         if (context->nice_set)
1429                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1430                         *exit_status = EXIT_NICE;
1431                         return -errno;
1432                 }
1433
1434         if (context->cpu_sched_set) {
1435                 struct sched_param param = {
1436                         .sched_priority = context->cpu_sched_priority,
1437                 };
1438
1439                 r = sched_setscheduler(0,
1440                                        context->cpu_sched_policy |
1441                                        (context->cpu_sched_reset_on_fork ?
1442                                         SCHED_RESET_ON_FORK : 0),
1443                                        &param);
1444                 if (r < 0) {
1445                         *exit_status = EXIT_SETSCHEDULER;
1446                         return -errno;
1447                 }
1448         }
1449
1450         if (context->cpuset)
1451                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1452                         *exit_status = EXIT_CPUAFFINITY;
1453                         return -errno;
1454                 }
1455
1456         if (context->ioprio_set)
1457                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1458                         *exit_status = EXIT_IOPRIO;
1459                         return -errno;
1460                 }
1461
1462         if (context->timer_slack_nsec != NSEC_INFINITY)
1463                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1464                         *exit_status = EXIT_TIMERSLACK;
1465                         return -errno;
1466                 }
1467
1468         if (context->personality != 0xffffffffUL)
1469                 if (personality(context->personality) < 0) {
1470                         *exit_status = EXIT_PERSONALITY;
1471                         return -errno;
1472                 }
1473
1474         if (context->utmp_id)
1475                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1476
1477         if (context->user && is_terminal_input(context->std_input)) {
1478                 r = chown_terminal(STDIN_FILENO, uid);
1479                 if (r < 0) {
1480                         *exit_status = EXIT_STDIN;
1481                         return r;
1482                 }
1483         }
1484
1485 #ifdef ENABLE_KDBUS
1486         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1487                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1488
1489                 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1490                 if (r < 0) {
1491                         *exit_status = EXIT_BUS_ENDPOINT;
1492                         return r;
1493                 }
1494         }
1495 #endif
1496
1497         /* If delegation is enabled we'll pass ownership of the cgroup
1498          * (but only in systemd's own controller hierarchy!) to the
1499          * user of the new process. */
1500         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1501                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1502                 if (r < 0) {
1503                         *exit_status = EXIT_CGROUP;
1504                         return r;
1505                 }
1506
1507
1508                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1509                 if (r < 0) {
1510                         *exit_status = EXIT_CGROUP;
1511                         return r;
1512                 }
1513         }
1514
1515         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1516                 char **rt;
1517
1518                 STRV_FOREACH(rt, context->runtime_directory) {
1519                         _cleanup_free_ char *p;
1520
1521                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1522                         if (!p) {
1523                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1524                                 return -ENOMEM;
1525                         }
1526
1527                         r = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1528                         if (r < 0) {
1529                                 *exit_status = EXIT_RUNTIME_DIRECTORY;
1530                                 return r;
1531                         }
1532                 }
1533         }
1534
1535         if (params->apply_permissions) {
1536                 r = enforce_groups(context, username, gid);
1537                 if (r < 0) {
1538                         *exit_status = EXIT_GROUP;
1539                         return r;
1540                 }
1541         }
1542
1543         umask(context->umask);
1544
1545 #ifdef HAVE_PAM
1546         if (params->apply_permissions && context->pam_name && username) {
1547                 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1548                 if (r < 0) {
1549                         *exit_status = EXIT_PAM;
1550                         return r;
1551                 }
1552         }
1553 #endif
1554
1555         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1556                 r = setup_netns(runtime->netns_storage_socket);
1557                 if (r < 0) {
1558                         *exit_status = EXIT_NETWORK;
1559                         return r;
1560                 }
1561         }
1562
1563         if (!strv_isempty(context->read_write_dirs) ||
1564             !strv_isempty(context->read_only_dirs) ||
1565             !strv_isempty(context->inaccessible_dirs) ||
1566             context->mount_flags != 0 ||
1567             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1568             params->bus_endpoint_path ||
1569             context->private_devices ||
1570             context->protect_system != PROTECT_SYSTEM_NO ||
1571             context->protect_home != PROTECT_HOME_NO) {
1572
1573                 char *tmp = NULL, *var = NULL;
1574
1575                 /* The runtime struct only contains the parent
1576                  * of the private /tmp, which is
1577                  * non-accessible to world users. Inside of it
1578                  * there's a /tmp that is sticky, and that's
1579                  * the one we want to use here. */
1580
1581                 if (context->private_tmp && runtime) {
1582                         if (runtime->tmp_dir)
1583                                 tmp = strjoina(runtime->tmp_dir, "/tmp");
1584                         if (runtime->var_tmp_dir)
1585                                 var = strjoina(runtime->var_tmp_dir, "/tmp");
1586                 }
1587
1588                 r = setup_namespace(
1589                                 context->read_write_dirs,
1590                                 context->read_only_dirs,
1591                                 context->inaccessible_dirs,
1592                                 tmp,
1593                                 var,
1594                                 params->bus_endpoint_path,
1595                                 context->private_devices,
1596                                 context->protect_home,
1597                                 context->protect_system,
1598                                 context->mount_flags);
1599
1600                 /* If we couldn't set up the namespace this is
1601                  * probably due to a missing capability. In this case,
1602                  * silently proceeed. */
1603                 if (r == -EPERM || r == -EACCES) {
1604                         log_open();
1605                         log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1606                         log_close();
1607                 } else if (r < 0) {
1608                         *exit_status = EXIT_NAMESPACE;
1609                         return r;
1610                 }
1611         }
1612
1613         if (params->apply_chroot) {
1614                 if (context->root_directory)
1615                         if (chroot(context->root_directory) < 0) {
1616                                 *exit_status = EXIT_CHROOT;
1617                                 return -errno;
1618                         }
1619
1620                 if (chdir(context->working_directory ?: "/") < 0 &&
1621                     !context->working_directory_missing_ok) {
1622                         *exit_status = EXIT_CHDIR;
1623                         return -errno;
1624                 }
1625         } else {
1626                 _cleanup_free_ char *d = NULL;
1627
1628                 if (asprintf(&d, "%s/%s",
1629                              context->root_directory ? context->root_directory : "",
1630                              context->working_directory ? context->working_directory : "") < 0) {
1631                         *exit_status = EXIT_MEMORY;
1632                         return -ENOMEM;
1633                 }
1634
1635                 if (chdir(d) < 0) {
1636                         *exit_status = EXIT_CHDIR;
1637                         return -errno;
1638                 }
1639         }
1640
1641 #ifdef HAVE_SELINUX
1642         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1643                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1644                 if (r < 0) {
1645                         *exit_status = EXIT_SELINUX_CONTEXT;
1646                         return r;
1647                 }
1648         }
1649 #endif
1650
1651         /* We repeat the fd closing here, to make sure that
1652          * nothing is leaked from the PAM modules. Note that
1653          * we are more aggressive this time since socket_fd
1654          * and the netns fds we don't need anymore. The custom
1655          * endpoint fd was needed to upload the policy and can
1656          * now be closed as well. */
1657         r = close_all_fds(fds, n_fds);
1658         if (r >= 0)
1659                 r = shift_fds(fds, n_fds);
1660         if (r >= 0)
1661                 r = flags_fds(fds, n_fds, context->non_blocking);
1662         if (r < 0) {
1663                 *exit_status = EXIT_FDS;
1664                 return r;
1665         }
1666
1667         if (params->apply_permissions) {
1668
1669                 for (i = 0; i < _RLIMIT_MAX; i++) {
1670                         if (!context->rlimit[i])
1671                                 continue;
1672
1673                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1674                                 *exit_status = EXIT_LIMITS;
1675                                 return -errno;
1676                         }
1677                 }
1678
1679                 if (context->capability_bounding_set_drop) {
1680                         r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1681                         if (r < 0) {
1682                                 *exit_status = EXIT_CAPABILITIES;
1683                                 return r;
1684                         }
1685                 }
1686
1687 #ifdef HAVE_SMACK
1688                 if (context->smack_process_label) {
1689                         r = mac_smack_apply_pid(0, context->smack_process_label);
1690                         if (r < 0) {
1691                                 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1692                                 return r;
1693                         }
1694                 }
1695 #endif
1696
1697                 if (context->user) {
1698                         r = enforce_user(context, uid);
1699                         if (r < 0) {
1700                                 *exit_status = EXIT_USER;
1701                                 return r;
1702                         }
1703                 }
1704
1705                 /* PR_GET_SECUREBITS is not privileged, while
1706                  * PR_SET_SECUREBITS is. So to suppress
1707                  * potential EPERMs we'll try not to call
1708                  * PR_SET_SECUREBITS unless necessary. */
1709                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1710                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1711                                 *exit_status = EXIT_SECUREBITS;
1712                                 return -errno;
1713                         }
1714
1715                 if (context->capabilities)
1716                         if (cap_set_proc(context->capabilities) < 0) {
1717                                 *exit_status = EXIT_CAPABILITIES;
1718                                 return -errno;
1719                         }
1720
1721                 if (context->no_new_privileges)
1722                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1723                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1724                                 return -errno;
1725                         }
1726
1727 #ifdef HAVE_SECCOMP
1728                 if (context->address_families_whitelist ||
1729                     !set_isempty(context->address_families)) {
1730                         r = apply_address_families(context);
1731                         if (r < 0) {
1732                                 *exit_status = EXIT_ADDRESS_FAMILIES;
1733                                 return r;
1734                         }
1735                 }
1736
1737                 if (context->syscall_whitelist ||
1738                     !set_isempty(context->syscall_filter) ||
1739                     !set_isempty(context->syscall_archs)) {
1740                         r = apply_seccomp(context);
1741                         if (r < 0) {
1742                                 *exit_status = EXIT_SECCOMP;
1743                                 return r;
1744                         }
1745                 }
1746 #endif
1747
1748 #ifdef HAVE_SELINUX
1749                 if (mac_selinux_use()) {
1750                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1751
1752                         if (exec_context) {
1753                                 r = setexeccon(exec_context);
1754                                 if (r < 0) {
1755                                         *exit_status = EXIT_SELINUX_CONTEXT;
1756                                         return r;
1757                                 }
1758                         }
1759                 }
1760 #endif
1761
1762 #ifdef HAVE_APPARMOR
1763                 if (context->apparmor_profile && mac_apparmor_use()) {
1764                         r = aa_change_onexec(context->apparmor_profile);
1765                         if (r < 0 && !context->apparmor_profile_ignore) {
1766                                 *exit_status = EXIT_APPARMOR_PROFILE;
1767                                 return -errno;
1768                         }
1769                 }
1770 #endif
1771         }
1772
1773         r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1774         if (r < 0) {
1775                 *exit_status = EXIT_MEMORY;
1776                 return r;
1777         }
1778
1779         final_env = strv_env_merge(5,
1780                                    params->environment,
1781                                    our_env,
1782                                    context->environment,
1783                                    files_env,
1784                                    pam_env,
1785                                    NULL);
1786         if (!final_env) {
1787                 *exit_status = EXIT_MEMORY;
1788                 return -ENOMEM;
1789         }
1790
1791         final_argv = replace_env_argv(argv, final_env);
1792         if (!final_argv) {
1793                 *exit_status = EXIT_MEMORY;
1794                 return -ENOMEM;
1795         }
1796
1797         final_env = strv_env_clean(final_env);
1798
1799         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1800                 _cleanup_free_ char *line;
1801
1802                 line = exec_command_line(final_argv);
1803                 if (line) {
1804                         log_open();
1805                         log_unit_struct(params->unit_id,
1806                                         LOG_DEBUG,
1807                                         "EXECUTABLE=%s", command->path,
1808                                         LOG_MESSAGE("Executing: %s", line),
1809                                         NULL);
1810                         log_close();
1811                 }
1812         }
1813         execve(command->path, final_argv, final_env);
1814         *exit_status = EXIT_EXEC;
1815         return -errno;
1816 }
1817
1818 int exec_spawn(ExecCommand *command,
1819                const ExecContext *context,
1820                const ExecParameters *params,
1821                ExecRuntime *runtime,
1822                pid_t *ret) {
1823
1824         _cleanup_strv_free_ char **files_env = NULL;
1825         int *fds = NULL; unsigned n_fds = 0;
1826         _cleanup_free_ char *line = NULL;
1827         int socket_fd, r;
1828         char **argv;
1829         pid_t pid;
1830
1831         assert(command);
1832         assert(context);
1833         assert(ret);
1834         assert(params);
1835         assert(params->fds || params->n_fds <= 0);
1836
1837         if (context->std_input == EXEC_INPUT_SOCKET ||
1838             context->std_output == EXEC_OUTPUT_SOCKET ||
1839             context->std_error == EXEC_OUTPUT_SOCKET) {
1840
1841                 if (params->n_fds != 1) {
1842                         log_unit_error(params->unit_id, "Got more than one socket.");
1843                         return -EINVAL;
1844                 }
1845
1846                 socket_fd = params->fds[0];
1847         } else {
1848                 socket_fd = -1;
1849                 fds = params->fds;
1850                 n_fds = params->n_fds;
1851         }
1852
1853         r = exec_context_load_environment(context, params->unit_id, &files_env);
1854         if (r < 0)
1855                 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1856
1857         argv = params->argv ?: command->argv;
1858         line = exec_command_line(argv);
1859         if (!line)
1860                 return log_oom();
1861
1862         log_unit_struct(params->unit_id,
1863                         LOG_DEBUG,
1864                         "EXECUTABLE=%s", command->path,
1865                         LOG_MESSAGE("About to execute: %s", line),
1866                         NULL);
1867         pid = fork();
1868         if (pid < 0)
1869                 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1870
1871         if (pid == 0) {
1872                 int exit_status;
1873
1874                 r = exec_child(command,
1875                                context,
1876                                params,
1877                                runtime,
1878                                argv,
1879                                socket_fd,
1880                                fds, n_fds,
1881                                files_env,
1882                                &exit_status);
1883                 if (r < 0) {
1884                         log_open();
1885                         log_unit_struct(params->unit_id,
1886                                         LOG_ERR,
1887                                         LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1888                                         "EXECUTABLE=%s", command->path,
1889                                         LOG_MESSAGE("Failed at step %s spawning %s: %s",
1890                                                     exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1891                                                     command->path, strerror(-r)),
1892                                         LOG_ERRNO(r),
1893                                         NULL);
1894                 }
1895
1896                 _exit(exit_status);
1897         }
1898
1899         log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1900
1901         /* We add the new process to the cgroup both in the child (so
1902          * that we can be sure that no user code is ever executed
1903          * outside of the cgroup) and in the parent (so that we can be
1904          * sure that when we kill the cgroup the process will be
1905          * killed too). */
1906         if (params->cgroup_path)
1907                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1908
1909         exec_status_start(&command->exec_status, pid);
1910
1911         *ret = pid;
1912         return 0;
1913 }
1914
1915 void exec_context_init(ExecContext *c) {
1916         assert(c);
1917
1918         c->umask = 0022;
1919         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1920         c->cpu_sched_policy = SCHED_OTHER;
1921         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1922         c->syslog_level_prefix = true;
1923         c->ignore_sigpipe = true;
1924         c->timer_slack_nsec = NSEC_INFINITY;
1925         c->personality = 0xffffffffUL;
1926         c->runtime_directory_mode = 0755;
1927 }
1928
1929 void exec_context_done(ExecContext *c) {
1930         unsigned l;
1931
1932         assert(c);
1933
1934         strv_free(c->environment);
1935         c->environment = NULL;
1936
1937         strv_free(c->environment_files);
1938         c->environment_files = NULL;
1939
1940         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1941                 free(c->rlimit[l]);
1942                 c->rlimit[l] = NULL;
1943         }
1944
1945         free(c->working_directory);
1946         c->working_directory = NULL;
1947         free(c->root_directory);
1948         c->root_directory = NULL;
1949
1950         free(c->tty_path);
1951         c->tty_path = NULL;
1952
1953         free(c->syslog_identifier);
1954         c->syslog_identifier = NULL;
1955
1956         free(c->user);
1957         c->user = NULL;
1958
1959         free(c->group);
1960         c->group = NULL;
1961
1962         strv_free(c->supplementary_groups);
1963         c->supplementary_groups = NULL;
1964
1965         free(c->pam_name);
1966         c->pam_name = NULL;
1967
1968         if (c->capabilities) {
1969                 cap_free(c->capabilities);
1970                 c->capabilities = NULL;
1971         }
1972
1973         strv_free(c->read_only_dirs);
1974         c->read_only_dirs = NULL;
1975
1976         strv_free(c->read_write_dirs);
1977         c->read_write_dirs = NULL;
1978
1979         strv_free(c->inaccessible_dirs);
1980         c->inaccessible_dirs = NULL;
1981
1982         if (c->cpuset)
1983                 CPU_FREE(c->cpuset);
1984
1985         free(c->utmp_id);
1986         c->utmp_id = NULL;
1987
1988         free(c->selinux_context);
1989         c->selinux_context = NULL;
1990
1991         free(c->apparmor_profile);
1992         c->apparmor_profile = NULL;
1993
1994         set_free(c->syscall_filter);
1995         c->syscall_filter = NULL;
1996
1997         set_free(c->syscall_archs);
1998         c->syscall_archs = NULL;
1999
2000         set_free(c->address_families);
2001         c->address_families = NULL;
2002
2003         strv_free(c->runtime_directory);
2004         c->runtime_directory = NULL;
2005
2006         bus_endpoint_free(c->bus_endpoint);
2007         c->bus_endpoint = NULL;
2008 }
2009
2010 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2011         char **i;
2012
2013         assert(c);
2014
2015         if (!runtime_prefix)
2016                 return 0;
2017
2018         STRV_FOREACH(i, c->runtime_directory) {
2019                 _cleanup_free_ char *p;
2020
2021                 p = strjoin(runtime_prefix, "/", *i, NULL);
2022                 if (!p)
2023                         return -ENOMEM;
2024
2025                 /* We execute this synchronously, since we need to be
2026                  * sure this is gone when we start the service
2027                  * next. */
2028                 rm_rf(p, false, true, false);
2029         }
2030
2031         return 0;
2032 }
2033
2034 void exec_command_done(ExecCommand *c) {
2035         assert(c);
2036
2037         free(c->path);
2038         c->path = NULL;
2039
2040         strv_free(c->argv);
2041         c->argv = NULL;
2042 }
2043
2044 void exec_command_done_array(ExecCommand *c, unsigned n) {
2045         unsigned i;
2046
2047         for (i = 0; i < n; i++)
2048                 exec_command_done(c+i);
2049 }
2050
2051 ExecCommand* exec_command_free_list(ExecCommand *c) {
2052         ExecCommand *i;
2053
2054         while ((i = c)) {
2055                 LIST_REMOVE(command, c, i);
2056                 exec_command_done(i);
2057                 free(i);
2058         }
2059
2060         return NULL;
2061 }
2062
2063 void exec_command_free_array(ExecCommand **c, unsigned n) {
2064         unsigned i;
2065
2066         for (i = 0; i < n; i++)
2067                 c[i] = exec_command_free_list(c[i]);
2068 }
2069
2070 typedef struct InvalidEnvInfo {
2071         const char *unit_id;
2072         const char *path;
2073 } InvalidEnvInfo;
2074
2075 static void invalid_env(const char *p, void *userdata) {
2076         InvalidEnvInfo *info = userdata;
2077
2078         log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2079 }
2080
2081 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2082         char **i, **r = NULL;
2083
2084         assert(c);
2085         assert(l);
2086
2087         STRV_FOREACH(i, c->environment_files) {
2088                 char *fn;
2089                 int k;
2090                 bool ignore = false;
2091                 char **p;
2092                 _cleanup_globfree_ glob_t pglob = {};
2093                 int count, n;
2094
2095                 fn = *i;
2096
2097                 if (fn[0] == '-') {
2098                         ignore = true;
2099                         fn ++;
2100                 }
2101
2102                 if (!path_is_absolute(fn)) {
2103                         if (ignore)
2104                                 continue;
2105
2106                         strv_free(r);
2107                         return -EINVAL;
2108                 }
2109
2110                 /* Filename supports globbing, take all matching files */
2111                 errno = 0;
2112                 if (glob(fn, 0, NULL, &pglob) != 0) {
2113                         if (ignore)
2114                                 continue;
2115
2116                         strv_free(r);
2117                         return errno ? -errno : -EINVAL;
2118                 }
2119                 count = pglob.gl_pathc;
2120                 if (count == 0) {
2121                         if (ignore)
2122                                 continue;
2123
2124                         strv_free(r);
2125                         return -EINVAL;
2126                 }
2127                 for (n = 0; n < count; n++) {
2128                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2129                         if (k < 0) {
2130                                 if (ignore)
2131                                         continue;
2132
2133                                 strv_free(r);
2134                                 return k;
2135                         }
2136                         /* Log invalid environment variables with filename */
2137                         if (p) {
2138                                 InvalidEnvInfo info = {
2139                                         .unit_id = unit_id,
2140                                         .path = pglob.gl_pathv[n]
2141                                 };
2142
2143                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
2144                         }
2145
2146                         if (r == NULL)
2147                                 r = p;
2148                         else {
2149                                 char **m;
2150
2151                                 m = strv_env_merge(2, r, p);
2152                                 strv_free(r);
2153                                 strv_free(p);
2154                                 if (!m)
2155                                         return -ENOMEM;
2156
2157                                 r = m;
2158                         }
2159                 }
2160         }
2161
2162         *l = r;
2163
2164         return 0;
2165 }
2166
2167 static bool tty_may_match_dev_console(const char *tty) {
2168         _cleanup_free_ char *active = NULL;
2169        char *console;
2170
2171         if (startswith(tty, "/dev/"))
2172                 tty += 5;
2173
2174         /* trivial identity? */
2175         if (streq(tty, "console"))
2176                 return true;
2177
2178         console = resolve_dev_console(&active);
2179         /* if we could not resolve, assume it may */
2180         if (!console)
2181                 return true;
2182
2183         /* "tty0" means the active VC, so it may be the same sometimes */
2184         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2185 }
2186
2187 bool exec_context_may_touch_console(ExecContext *ec) {
2188         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2189                 is_terminal_input(ec->std_input) ||
2190                 is_terminal_output(ec->std_output) ||
2191                 is_terminal_output(ec->std_error)) &&
2192                tty_may_match_dev_console(tty_path(ec));
2193 }
2194
2195 static void strv_fprintf(FILE *f, char **l) {
2196         char **g;
2197
2198         assert(f);
2199
2200         STRV_FOREACH(g, l)
2201                 fprintf(f, " %s", *g);
2202 }
2203
2204 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2205         char **e;
2206         unsigned i;
2207
2208         assert(c);
2209         assert(f);
2210
2211         prefix = strempty(prefix);
2212
2213         fprintf(f,
2214                 "%sUMask: %04o\n"
2215                 "%sWorkingDirectory: %s\n"
2216                 "%sRootDirectory: %s\n"
2217                 "%sNonBlocking: %s\n"
2218                 "%sPrivateTmp: %s\n"
2219                 "%sPrivateNetwork: %s\n"
2220                 "%sPrivateDevices: %s\n"
2221                 "%sProtectHome: %s\n"
2222                 "%sProtectSystem: %s\n"
2223                 "%sIgnoreSIGPIPE: %s\n",
2224                 prefix, c->umask,
2225                 prefix, c->working_directory ? c->working_directory : "/",
2226                 prefix, c->root_directory ? c->root_directory : "/",
2227                 prefix, yes_no(c->non_blocking),
2228                 prefix, yes_no(c->private_tmp),
2229                 prefix, yes_no(c->private_network),
2230                 prefix, yes_no(c->private_devices),
2231                 prefix, protect_home_to_string(c->protect_home),
2232                 prefix, protect_system_to_string(c->protect_system),
2233                 prefix, yes_no(c->ignore_sigpipe));
2234
2235         STRV_FOREACH(e, c->environment)
2236                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2237
2238         STRV_FOREACH(e, c->environment_files)
2239                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2240
2241         if (c->nice_set)
2242                 fprintf(f,
2243                         "%sNice: %i\n",
2244                         prefix, c->nice);
2245
2246         if (c->oom_score_adjust_set)
2247                 fprintf(f,
2248                         "%sOOMScoreAdjust: %i\n",
2249                         prefix, c->oom_score_adjust);
2250
2251         for (i = 0; i < RLIM_NLIMITS; i++)
2252                 if (c->rlimit[i])
2253                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2254                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2255
2256         if (c->ioprio_set) {
2257                 _cleanup_free_ char *class_str = NULL;
2258
2259                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2260                 fprintf(f,
2261                         "%sIOSchedulingClass: %s\n"
2262                         "%sIOPriority: %i\n",
2263                         prefix, strna(class_str),
2264                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2265         }
2266
2267         if (c->cpu_sched_set) {
2268                 _cleanup_free_ char *policy_str = NULL;
2269
2270                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2271                 fprintf(f,
2272                         "%sCPUSchedulingPolicy: %s\n"
2273                         "%sCPUSchedulingPriority: %i\n"
2274                         "%sCPUSchedulingResetOnFork: %s\n",
2275                         prefix, strna(policy_str),
2276                         prefix, c->cpu_sched_priority,
2277                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2278         }
2279
2280         if (c->cpuset) {
2281                 fprintf(f, "%sCPUAffinity:", prefix);
2282                 for (i = 0; i < c->cpuset_ncpus; i++)
2283                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2284                                 fprintf(f, " %u", i);
2285                 fputs("\n", f);
2286         }
2287
2288         if (c->timer_slack_nsec != NSEC_INFINITY)
2289                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2290
2291         fprintf(f,
2292                 "%sStandardInput: %s\n"
2293                 "%sStandardOutput: %s\n"
2294                 "%sStandardError: %s\n",
2295                 prefix, exec_input_to_string(c->std_input),
2296                 prefix, exec_output_to_string(c->std_output),
2297                 prefix, exec_output_to_string(c->std_error));
2298
2299         if (c->tty_path)
2300                 fprintf(f,
2301                         "%sTTYPath: %s\n"
2302                         "%sTTYReset: %s\n"
2303                         "%sTTYVHangup: %s\n"
2304                         "%sTTYVTDisallocate: %s\n",
2305                         prefix, c->tty_path,
2306                         prefix, yes_no(c->tty_reset),
2307                         prefix, yes_no(c->tty_vhangup),
2308                         prefix, yes_no(c->tty_vt_disallocate));
2309
2310         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2311             c->std_output == EXEC_OUTPUT_KMSG ||
2312             c->std_output == EXEC_OUTPUT_JOURNAL ||
2313             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2314             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2315             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2316             c->std_error == EXEC_OUTPUT_SYSLOG ||
2317             c->std_error == EXEC_OUTPUT_KMSG ||
2318             c->std_error == EXEC_OUTPUT_JOURNAL ||
2319             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2320             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2321             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2322
2323                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2324
2325                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2326                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2327
2328                 fprintf(f,
2329                         "%sSyslogFacility: %s\n"
2330                         "%sSyslogLevel: %s\n",
2331                         prefix, strna(fac_str),
2332                         prefix, strna(lvl_str));
2333         }
2334
2335         if (c->capabilities) {
2336                 _cleanup_cap_free_charp_ char *t;
2337
2338                 t = cap_to_text(c->capabilities, NULL);
2339                 if (t)
2340                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2341         }
2342
2343         if (c->secure_bits)
2344                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2345                         prefix,
2346                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2347                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2348                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2349                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2350                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2351                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2352
2353         if (c->capability_bounding_set_drop) {
2354                 unsigned long l;
2355                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2356
2357                 for (l = 0; l <= cap_last_cap(); l++)
2358                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2359                                 fprintf(f, " %s", strna(capability_to_name(l)));
2360
2361                 fputs("\n", f);
2362         }
2363
2364         if (c->user)
2365                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2366         if (c->group)
2367                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2368
2369         if (strv_length(c->supplementary_groups) > 0) {
2370                 fprintf(f, "%sSupplementaryGroups:", prefix);
2371                 strv_fprintf(f, c->supplementary_groups);
2372                 fputs("\n", f);
2373         }
2374
2375         if (c->pam_name)
2376                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2377
2378         if (strv_length(c->read_write_dirs) > 0) {
2379                 fprintf(f, "%sReadWriteDirs:", prefix);
2380                 strv_fprintf(f, c->read_write_dirs);
2381                 fputs("\n", f);
2382         }
2383
2384         if (strv_length(c->read_only_dirs) > 0) {
2385                 fprintf(f, "%sReadOnlyDirs:", prefix);
2386                 strv_fprintf(f, c->read_only_dirs);
2387                 fputs("\n", f);
2388         }
2389
2390         if (strv_length(c->inaccessible_dirs) > 0) {
2391                 fprintf(f, "%sInaccessibleDirs:", prefix);
2392                 strv_fprintf(f, c->inaccessible_dirs);
2393                 fputs("\n", f);
2394         }
2395
2396         if (c->utmp_id)
2397                 fprintf(f,
2398                         "%sUtmpIdentifier: %s\n",
2399                         prefix, c->utmp_id);
2400
2401         if (c->selinux_context)
2402                 fprintf(f,
2403                         "%sSELinuxContext: %s%s\n",
2404                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2405
2406         if (c->personality != 0xffffffffUL)
2407                 fprintf(f,
2408                         "%sPersonality: %s\n",
2409                         prefix, strna(personality_to_string(c->personality)));
2410
2411         if (c->syscall_filter) {
2412 #ifdef HAVE_SECCOMP
2413                 Iterator j;
2414                 void *id;
2415                 bool first = true;
2416 #endif
2417
2418                 fprintf(f,
2419                         "%sSystemCallFilter: ",
2420                         prefix);
2421
2422                 if (!c->syscall_whitelist)
2423                         fputc('~', f);
2424
2425 #ifdef HAVE_SECCOMP
2426                 SET_FOREACH(id, c->syscall_filter, j) {
2427                         _cleanup_free_ char *name = NULL;
2428
2429                         if (first)
2430                                 first = false;
2431                         else
2432                                 fputc(' ', f);
2433
2434                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2435                         fputs(strna(name), f);
2436                 }
2437 #endif
2438
2439                 fputc('\n', f);
2440         }
2441
2442         if (c->syscall_archs) {
2443 #ifdef HAVE_SECCOMP
2444                 Iterator j;
2445                 void *id;
2446 #endif
2447
2448                 fprintf(f,
2449                         "%sSystemCallArchitectures:",
2450                         prefix);
2451
2452 #ifdef HAVE_SECCOMP
2453                 SET_FOREACH(id, c->syscall_archs, j)
2454                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2455 #endif
2456                 fputc('\n', f);
2457         }
2458
2459         if (c->syscall_errno != 0)
2460                 fprintf(f,
2461                         "%sSystemCallErrorNumber: %s\n",
2462                         prefix, strna(errno_to_name(c->syscall_errno)));
2463
2464         if (c->apparmor_profile)
2465                 fprintf(f,
2466                         "%sAppArmorProfile: %s%s\n",
2467                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2468 }
2469
2470 bool exec_context_maintains_privileges(ExecContext *c) {
2471         assert(c);
2472
2473         /* Returns true if the process forked off would run run under
2474          * an unchanged UID or as root. */
2475
2476         if (!c->user)
2477                 return true;
2478
2479         if (streq(c->user, "root") || streq(c->user, "0"))
2480                 return true;
2481
2482         return false;
2483 }
2484
2485 void exec_status_start(ExecStatus *s, pid_t pid) {
2486         assert(s);
2487
2488         zero(*s);
2489         s->pid = pid;
2490         dual_timestamp_get(&s->start_timestamp);
2491 }
2492
2493 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2494         assert(s);
2495
2496         if (s->pid && s->pid != pid)
2497                 zero(*s);
2498
2499         s->pid = pid;
2500         dual_timestamp_get(&s->exit_timestamp);
2501
2502         s->code = code;
2503         s->status = status;
2504
2505         if (context) {
2506                 if (context->utmp_id)
2507                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2508
2509                 exec_context_tty_reset(context);
2510         }
2511 }
2512
2513 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2514         char buf[FORMAT_TIMESTAMP_MAX];
2515
2516         assert(s);
2517         assert(f);
2518
2519         if (s->pid <= 0)
2520                 return;
2521
2522         prefix = strempty(prefix);
2523
2524         fprintf(f,
2525                 "%sPID: "PID_FMT"\n",
2526                 prefix, s->pid);
2527
2528         if (s->start_timestamp.realtime > 0)
2529                 fprintf(f,
2530                         "%sStart Timestamp: %s\n",
2531                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2532
2533         if (s->exit_timestamp.realtime > 0)
2534                 fprintf(f,
2535                         "%sExit Timestamp: %s\n"
2536                         "%sExit Code: %s\n"
2537                         "%sExit Status: %i\n",
2538                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2539                         prefix, sigchld_code_to_string(s->code),
2540                         prefix, s->status);
2541 }
2542
2543 char *exec_command_line(char **argv) {
2544         size_t k;
2545         char *n, *p, **a;
2546         bool first = true;
2547
2548         assert(argv);
2549
2550         k = 1;
2551         STRV_FOREACH(a, argv)
2552                 k += strlen(*a)+3;
2553
2554         if (!(n = new(char, k)))
2555                 return NULL;
2556
2557         p = n;
2558         STRV_FOREACH(a, argv) {
2559
2560                 if (!first)
2561                         *(p++) = ' ';
2562                 else
2563                         first = false;
2564
2565                 if (strpbrk(*a, WHITESPACE)) {
2566                         *(p++) = '\'';
2567                         p = stpcpy(p, *a);
2568                         *(p++) = '\'';
2569                 } else
2570                         p = stpcpy(p, *a);
2571
2572         }
2573
2574         *p = 0;
2575
2576         /* FIXME: this doesn't really handle arguments that have
2577          * spaces and ticks in them */
2578
2579         return n;
2580 }
2581
2582 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2583         _cleanup_free_ char *cmd = NULL;
2584         const char *prefix2;
2585
2586         assert(c);
2587         assert(f);
2588
2589         prefix = strempty(prefix);
2590         prefix2 = strjoina(prefix, "\t");
2591
2592         cmd = exec_command_line(c->argv);
2593         fprintf(f,
2594                 "%sCommand Line: %s\n",
2595                 prefix, cmd ? cmd : strerror(ENOMEM));
2596
2597         exec_status_dump(&c->exec_status, f, prefix2);
2598 }
2599
2600 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2601         assert(f);
2602
2603         prefix = strempty(prefix);
2604
2605         LIST_FOREACH(command, c, c)
2606                 exec_command_dump(c, f, prefix);
2607 }
2608
2609 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2610         ExecCommand *end;
2611
2612         assert(l);
2613         assert(e);
2614
2615         if (*l) {
2616                 /* It's kind of important, that we keep the order here */
2617                 LIST_FIND_TAIL(command, *l, end);
2618                 LIST_INSERT_AFTER(command, *l, end, e);
2619         } else
2620               *l = e;
2621 }
2622
2623 int exec_command_set(ExecCommand *c, const char *path, ...) {
2624         va_list ap;
2625         char **l, *p;
2626
2627         assert(c);
2628         assert(path);
2629
2630         va_start(ap, path);
2631         l = strv_new_ap(path, ap);
2632         va_end(ap);
2633
2634         if (!l)
2635                 return -ENOMEM;
2636
2637         p = strdup(path);
2638         if (!p) {
2639                 strv_free(l);
2640                 return -ENOMEM;
2641         }
2642
2643         free(c->path);
2644         c->path = p;
2645
2646         strv_free(c->argv);
2647         c->argv = l;
2648
2649         return 0;
2650 }
2651
2652 int exec_command_append(ExecCommand *c, const char *path, ...) {
2653         _cleanup_strv_free_ char **l = NULL;
2654         va_list ap;
2655         int r;
2656
2657         assert(c);
2658         assert(path);
2659
2660         va_start(ap, path);
2661         l = strv_new_ap(path, ap);
2662         va_end(ap);
2663
2664         if (!l)
2665                 return -ENOMEM;
2666
2667         r = strv_extend_strv(&c->argv, l);
2668         if (r < 0)
2669                 return r;
2670
2671         return 0;
2672 }
2673
2674
2675 static int exec_runtime_allocate(ExecRuntime **rt) {
2676
2677         if (*rt)
2678                 return 0;
2679
2680         *rt = new0(ExecRuntime, 1);
2681         if (!*rt)
2682                 return -ENOMEM;
2683
2684         (*rt)->n_ref = 1;
2685         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2686
2687         return 0;
2688 }
2689
2690 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2691         int r;
2692
2693         assert(rt);
2694         assert(c);
2695         assert(id);
2696
2697         if (*rt)
2698                 return 1;
2699
2700         if (!c->private_network && !c->private_tmp)
2701                 return 0;
2702
2703         r = exec_runtime_allocate(rt);
2704         if (r < 0)
2705                 return r;
2706
2707         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2708                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2709                         return -errno;
2710         }
2711
2712         if (c->private_tmp && !(*rt)->tmp_dir) {
2713                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2714                 if (r < 0)
2715                         return r;
2716         }
2717
2718         return 1;
2719 }
2720
2721 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2722         assert(r);
2723         assert(r->n_ref > 0);
2724
2725         r->n_ref++;
2726         return r;
2727 }
2728
2729 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2730
2731         if (!r)
2732                 return NULL;
2733
2734         assert(r->n_ref > 0);
2735
2736         r->n_ref--;
2737         if (r->n_ref <= 0) {
2738                 free(r->tmp_dir);
2739                 free(r->var_tmp_dir);
2740                 safe_close_pair(r->netns_storage_socket);
2741                 free(r);
2742         }
2743
2744         return NULL;
2745 }
2746
2747 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2748         assert(u);
2749         assert(f);
2750         assert(fds);
2751
2752         if (!rt)
2753                 return 0;
2754
2755         if (rt->tmp_dir)
2756                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2757
2758         if (rt->var_tmp_dir)
2759                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2760
2761         if (rt->netns_storage_socket[0] >= 0) {
2762                 int copy;
2763
2764                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2765                 if (copy < 0)
2766                         return copy;
2767
2768                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2769         }
2770
2771         if (rt->netns_storage_socket[1] >= 0) {
2772                 int copy;
2773
2774                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2775                 if (copy < 0)
2776                         return copy;
2777
2778                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2779         }
2780
2781         return 0;
2782 }
2783
2784 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2785         int r;
2786
2787         assert(rt);
2788         assert(key);
2789         assert(value);
2790
2791         if (streq(key, "tmp-dir")) {
2792                 char *copy;
2793
2794                 r = exec_runtime_allocate(rt);
2795                 if (r < 0)
2796                         return r;
2797
2798                 copy = strdup(value);
2799                 if (!copy)
2800                         return log_oom();
2801
2802                 free((*rt)->tmp_dir);
2803                 (*rt)->tmp_dir = copy;
2804
2805         } else if (streq(key, "var-tmp-dir")) {
2806                 char *copy;
2807
2808                 r = exec_runtime_allocate(rt);
2809                 if (r < 0)
2810                         return r;
2811
2812                 copy = strdup(value);
2813                 if (!copy)
2814                         return log_oom();
2815
2816                 free((*rt)->var_tmp_dir);
2817                 (*rt)->var_tmp_dir = copy;
2818
2819         } else if (streq(key, "netns-socket-0")) {
2820                 int fd;
2821
2822                 r = exec_runtime_allocate(rt);
2823                 if (r < 0)
2824                         return r;
2825
2826                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2827                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2828                 else {
2829                         safe_close((*rt)->netns_storage_socket[0]);
2830                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2831                 }
2832         } else if (streq(key, "netns-socket-1")) {
2833                 int fd;
2834
2835                 r = exec_runtime_allocate(rt);
2836                 if (r < 0)
2837                         return r;
2838
2839                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2840                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2841                 else {
2842                         safe_close((*rt)->netns_storage_socket[1]);
2843                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2844                 }
2845         } else
2846                 return 0;
2847
2848         return 1;
2849 }
2850
2851 static void *remove_tmpdir_thread(void *p) {
2852         _cleanup_free_ char *path = p;
2853
2854         rm_rf_dangerous(path, false, true, false);
2855         return NULL;
2856 }
2857
2858 void exec_runtime_destroy(ExecRuntime *rt) {
2859         int r;
2860
2861         if (!rt)
2862                 return;
2863
2864         /* If there are multiple users of this, let's leave the stuff around */
2865         if (rt->n_ref > 1)
2866                 return;
2867
2868         if (rt->tmp_dir) {
2869                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2870
2871                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2872                 if (r < 0) {
2873                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2874                         free(rt->tmp_dir);
2875                 }
2876
2877                 rt->tmp_dir = NULL;
2878         }
2879
2880         if (rt->var_tmp_dir) {
2881                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2882
2883                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2884                 if (r < 0) {
2885                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2886                         free(rt->var_tmp_dir);
2887                 }
2888
2889                 rt->var_tmp_dir = NULL;
2890         }
2891
2892         safe_close_pair(rt->netns_storage_socket);
2893 }
2894
2895 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2896         [EXEC_INPUT_NULL] = "null",
2897         [EXEC_INPUT_TTY] = "tty",
2898         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2899         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2900         [EXEC_INPUT_SOCKET] = "socket"
2901 };
2902
2903 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2904
2905 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2906         [EXEC_OUTPUT_INHERIT] = "inherit",
2907         [EXEC_OUTPUT_NULL] = "null",
2908         [EXEC_OUTPUT_TTY] = "tty",
2909         [EXEC_OUTPUT_SYSLOG] = "syslog",
2910         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2911         [EXEC_OUTPUT_KMSG] = "kmsg",
2912         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2913         [EXEC_OUTPUT_JOURNAL] = "journal",
2914         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2915         [EXEC_OUTPUT_SOCKET] = "socket"
2916 };
2917
2918 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);