chiark / gitweb /
e683fa5e16e63a3f84a60130cc8cebd9889f980b
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100         int start, restart_from;
101
102         if (n_fds <= 0)
103                 return 0;
104
105         /* Modifies the fds array! (sorts it) */
106
107         assert(fds);
108
109         start = 0;
110         for (;;) {
111                 int i;
112
113                 restart_from = -1;
114
115                 for (i = start; i < (int) n_fds; i++) {
116                         int nfd;
117
118                         /* Already at right index? */
119                         if (fds[i] == i+3)
120                                 continue;
121
122                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123                                 return -errno;
124
125                         safe_close(fds[i]);
126                         fds[i] = nfd;
127
128                         /* Hmm, the fd we wanted isn't free? Then
129                          * let's remember that and try again from here*/
130                         if (nfd != i+3 && restart_from < 0)
131                                 restart_from = i;
132                 }
133
134                 if (restart_from < 0)
135                         break;
136
137                 start = restart_from;
138         }
139
140         return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144         unsigned i;
145         int r;
146
147         if (n_fds <= 0)
148                 return 0;
149
150         assert(fds);
151
152         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154         for (i = 0; i < n_fds; i++) {
155
156                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157                         return r;
158
159                 /* We unconditionally drop FD_CLOEXEC from the fds,
160                  * since after all we want to pass these fds to our
161                  * children */
162
163                 if ((r = fd_cloexec(fds[i], false)) < 0)
164                         return r;
165         }
166
167         return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171         assert(context);
172
173         if (context->tty_path)
174                 return context->tty_path;
175
176         return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180         assert(context);
181
182         if (context->tty_vhangup)
183                 terminal_vhangup(tty_path(context));
184
185         if (context->tty_reset)
186                 reset_terminal(tty_path(context));
187
188         if (context->tty_vt_disallocate && context->tty_path)
189                 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193         return
194                 o == EXEC_OUTPUT_TTY ||
195                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201         int fd, r;
202
203         assert(nfd >= 0);
204
205         fd = open("/dev/null", flags|O_NOCTTY);
206         if (fd < 0)
207                 return -errno;
208
209         if (fd != nfd) {
210                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211                 safe_close(fd);
212         } else
213                 r = nfd;
214
215         return r;
216 }
217
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
219         int fd, r;
220         union sockaddr_union sa = {
221                 .un.sun_family = AF_UNIX,
222                 .un.sun_path = "/run/systemd/journal/stdout",
223         };
224
225         assert(context);
226         assert(output < _EXEC_OUTPUT_MAX);
227         assert(ident);
228         assert(nfd >= 0);
229
230         fd = socket(AF_UNIX, SOCK_STREAM, 0);
231         if (fd < 0)
232                 return -errno;
233
234         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
235         if (r < 0) {
236                 safe_close(fd);
237                 return -errno;
238         }
239
240         if (shutdown(fd, SHUT_RD) < 0) {
241                 safe_close(fd);
242                 return -errno;
243         }
244
245         fd_inc_sndbuf(fd, SNDBUF_SIZE);
246
247         dprintf(fd,
248                 "%s\n"
249                 "%s\n"
250                 "%i\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n",
255                 context->syslog_identifier ? context->syslog_identifier : ident,
256                 unit_id,
257                 context->syslog_priority,
258                 !!context->syslog_level_prefix,
259                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261                 is_terminal_output(output));
262
263         if (fd != nfd) {
264                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
265                 safe_close(fd);
266         } else
267                 r = nfd;
268
269         return r;
270 }
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
272         int fd, r;
273
274         assert(path);
275         assert(nfd >= 0);
276
277         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
278                 return fd;
279
280         if (fd != nfd) {
281                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
282                 safe_close(fd);
283         } else
284                 r = nfd;
285
286         return r;
287 }
288
289 static bool is_terminal_input(ExecInput i) {
290         return
291                 i == EXEC_INPUT_TTY ||
292                 i == EXEC_INPUT_TTY_FORCE ||
293                 i == EXEC_INPUT_TTY_FAIL;
294 }
295
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
297
298         if (is_terminal_input(std_input) && !apply_tty_stdin)
299                 return EXEC_INPUT_NULL;
300
301         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302                 return EXEC_INPUT_NULL;
303
304         return std_input;
305 }
306
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
308
309         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310                 return EXEC_OUTPUT_INHERIT;
311
312         return std_output;
313 }
314
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
316         ExecInput i;
317
318         assert(context);
319
320         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
321
322         switch (i) {
323
324         case EXEC_INPUT_NULL:
325                 return open_null_as(O_RDONLY, STDIN_FILENO);
326
327         case EXEC_INPUT_TTY:
328         case EXEC_INPUT_TTY_FORCE:
329         case EXEC_INPUT_TTY_FAIL: {
330                 int fd, r;
331
332                 fd = acquire_terminal(tty_path(context),
333                                       i == EXEC_INPUT_TTY_FAIL,
334                                       i == EXEC_INPUT_TTY_FORCE,
335                                       false,
336                                       USEC_INFINITY);
337                 if (fd < 0)
338                         return fd;
339
340                 if (fd != STDIN_FILENO) {
341                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
342                         safe_close(fd);
343                 } else
344                         r = STDIN_FILENO;
345
346                 return r;
347         }
348
349         case EXEC_INPUT_SOCKET:
350                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
351
352         default:
353                 assert_not_reached("Unknown input type");
354         }
355 }
356
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
358         ExecOutput o;
359         ExecInput i;
360         int r;
361
362         assert(context);
363         assert(ident);
364
365         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366         o = fixup_output(context->std_output, socket_fd);
367
368         if (fileno == STDERR_FILENO) {
369                 ExecOutput e;
370                 e = fixup_output(context->std_error, socket_fd);
371
372                 /* This expects the input and output are already set up */
373
374                 /* Don't change the stderr file descriptor if we inherit all
375                  * the way and are not on a tty */
376                 if (e == EXEC_OUTPUT_INHERIT &&
377                     o == EXEC_OUTPUT_INHERIT &&
378                     i == EXEC_INPUT_NULL &&
379                     !is_terminal_input(context->std_input) &&
380                     getppid () != 1)
381                         return fileno;
382
383                 /* Duplicate from stdout if possible */
384                 if (e == o || e == EXEC_OUTPUT_INHERIT)
385                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 o = e;
388
389         } else if (o == EXEC_OUTPUT_INHERIT) {
390                 /* If input got downgraded, inherit the original value */
391                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
393
394                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395                 if (i != EXEC_INPUT_NULL)
396                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
397
398                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
399                 if (getppid() != 1)
400                         return fileno;
401
402                 /* We need to open /dev/null here anew, to get the right access mode. */
403                 return open_null_as(O_WRONLY, fileno);
404         }
405
406         switch (o) {
407
408         case EXEC_OUTPUT_NULL:
409                 return open_null_as(O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_TTY:
412                 if (is_terminal_input(i))
413                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
414
415                 /* We don't reset the terminal if this is just about output */
416                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
417
418         case EXEC_OUTPUT_SYSLOG:
419         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420         case EXEC_OUTPUT_KMSG:
421         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422         case EXEC_OUTPUT_JOURNAL:
423         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424                 r = connect_logger_as(context, o, ident, unit_id, fileno);
425                 if (r < 0) {
426                         log_struct_unit(LOG_CRIT, unit_id,
427                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428                                 fileno == STDOUT_FILENO ? "out" : "err",
429                                 unit_id, strerror(-r),
430                                 "ERRNO=%d", -r,
431                                 NULL);
432                         r = open_null_as(O_WRONLY, fileno);
433                 }
434                 return r;
435
436         case EXEC_OUTPUT_SOCKET:
437                 assert(socket_fd >= 0);
438                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
439
440         default:
441                 assert_not_reached("Unknown error type");
442         }
443 }
444
445 static int chown_terminal(int fd, uid_t uid) {
446         struct stat st;
447
448         assert(fd >= 0);
449
450         /* This might fail. What matters are the results. */
451         (void) fchown(fd, uid, -1);
452         (void) fchmod(fd, TTY_MODE);
453
454         if (fstat(fd, &st) < 0)
455                 return -errno;
456
457         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
458                 return -EPERM;
459
460         return 0;
461 }
462
463 static int setup_confirm_stdio(int *_saved_stdin,
464                                int *_saved_stdout) {
465         int fd = -1, saved_stdin, saved_stdout = -1, r;
466
467         assert(_saved_stdin);
468         assert(_saved_stdout);
469
470         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
471         if (saved_stdin < 0)
472                 return -errno;
473
474         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475         if (saved_stdout < 0) {
476                 r = errno;
477                 goto fail;
478         }
479
480         fd = acquire_terminal(
481                         "/dev/console",
482                         false,
483                         false,
484                         false,
485                         DEFAULT_CONFIRM_USEC);
486         if (fd < 0) {
487                 r = fd;
488                 goto fail;
489         }
490
491         r = chown_terminal(fd, getuid());
492         if (r < 0)
493                 goto fail;
494
495         if (dup2(fd, STDIN_FILENO) < 0) {
496                 r = -errno;
497                 goto fail;
498         }
499
500         if (dup2(fd, STDOUT_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (fd >= 2)
506                 safe_close(fd);
507
508         *_saved_stdin = saved_stdin;
509         *_saved_stdout = saved_stdout;
510
511         return 0;
512
513 fail:
514         safe_close(saved_stdout);
515         safe_close(saved_stdin);
516         safe_close(fd);
517
518         return r;
519 }
520
521 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
522         _cleanup_close_ int fd = -1;
523         va_list ap;
524
525         assert(format);
526
527         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
528         if (fd < 0)
529                 return fd;
530
531         va_start(ap, format);
532         vdprintf(fd, format, ap);
533         va_end(ap);
534
535         return 0;
536 }
537
538 static int restore_confirm_stdio(int *saved_stdin,
539                                  int *saved_stdout) {
540
541         int r = 0;
542
543         assert(saved_stdin);
544         assert(saved_stdout);
545
546         release_terminal();
547
548         if (*saved_stdin >= 0)
549                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdout >= 0)
553                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554                         r = -errno;
555
556         safe_close(*saved_stdin);
557         safe_close(*saved_stdout);
558
559         return r;
560 }
561
562 static int ask_for_confirmation(char *response, char **argv) {
563         int saved_stdout = -1, saved_stdin = -1, r;
564         _cleanup_free_ char *line = NULL;
565
566         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567         if (r < 0)
568                 return r;
569
570         line = exec_command_line(argv);
571         if (!line)
572                 return -ENOMEM;
573
574         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         assert(context);
662
663         /* Sets (but doesn't lookup) the uid and make sure we keep the
664          * capabilities while doing so. */
665
666         if (context->capabilities) {
667                 _cleanup_cap_free_ cap_t d = NULL;
668                 static const cap_value_t bits[] = {
669                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
670                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
671                 };
672
673                 /* First step: If we need to keep capabilities but
674                  * drop privileges we need to make sure we keep our
675                  * caps, while we drop privileges. */
676                 if (uid != 0) {
677                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
678
679                         if (prctl(PR_GET_SECUREBITS) != sb)
680                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681                                         return -errno;
682                 }
683
684                 /* Second step: set the capabilities. This will reduce
685                  * the capabilities to the minimum we need. */
686
687                 d = cap_dup(context->capabilities);
688                 if (!d)
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
693                         return -errno;
694
695                 if (cap_set_proc(d) < 0)
696                         return -errno;
697         }
698
699         /* Third step: actually set the uids */
700         if (setresuid(uid, uid, uid) < 0)
701                 return -errno;
702
703         /* At this point we should have all necessary capabilities but
704            are otherwise a normal user. However, the caps might got
705            corrupted due to the setresuid() so we need clean them up
706            later. This is done outside of this call. */
707
708         return 0;
709 }
710
711 #ifdef HAVE_PAM
712
713 static int null_conv(
714                 int num_msg,
715                 const struct pam_message **msg,
716                 struct pam_response **resp,
717                 void *appdata_ptr) {
718
719         /* We don't support conversations */
720
721         return PAM_CONV_ERR;
722 }
723
724 static int setup_pam(
725                 const char *name,
726                 const char *user,
727                 uid_t uid,
728                 const char *tty,
729                 char ***pam_env,
730                 int fds[], unsigned n_fds) {
731
732         static const struct pam_conv conv = {
733                 .conv = null_conv,
734                 .appdata_ptr = NULL
735         };
736
737         pam_handle_t *handle = NULL;
738         sigset_t ss, old_ss;
739         int pam_code = PAM_SUCCESS;
740         int err;
741         char **e = NULL;
742         bool close_session = false;
743         pid_t pam_pid = 0, parent_pid;
744         int flags = 0;
745
746         assert(name);
747         assert(user);
748         assert(pam_env);
749
750         /* We set up PAM in the parent process, then fork. The child
751          * will then stay around until killed via PR_GET_PDEATHSIG or
752          * systemd via the cgroup logic. It will then remove the PAM
753          * session again. The parent process will exec() the actual
754          * daemon. We do things this way to ensure that the main PID
755          * of the daemon is the one we initially fork()ed. */
756
757         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
758                 flags |= PAM_SILENT;
759
760         pam_code = pam_start(name, user, &conv, &handle);
761         if (pam_code != PAM_SUCCESS) {
762                 handle = NULL;
763                 goto fail;
764         }
765
766         if (tty) {
767                 pam_code = pam_set_item(handle, PAM_TTY, tty);
768                 if (pam_code != PAM_SUCCESS)
769                         goto fail;
770         }
771
772         pam_code = pam_acct_mgmt(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         pam_code = pam_open_session(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         e = pam_getenvlist(handle);
783         if (!e) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         pam_pid = fork();
798         if (pam_pid < 0)
799                 goto fail;
800
801         if (pam_pid == 0) {
802                 int sig;
803                 int r = EXIT_PAM;
804
805                 /* The child's job is to reset the PAM session on
806                  * termination */
807
808                 /* This string must fit in 10 chars (i.e. the length
809                  * of "/sbin/init"), to look pretty in /bin/ps */
810                 rename_process("(sd-pam)");
811
812                 /* Make sure we don't keep open the passed fds in this
813                 child. We assume that otherwise only those fds are
814                 open here that have been opened by PAM. */
815                 close_many(fds, n_fds);
816
817                 /* Drop privileges - we don't need any to pam_close_session
818                  * and this will make PR_SET_PDEATHSIG work in most cases.
819                  * If this fails, ignore the error - but expect sd-pam threads
820                  * to fail to exit normally */
821                 if (setresuid(uid, uid, uid) < 0)
822                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823
824                 /* Wait until our parent died. This will only work if
825                  * the above setresuid() succeeds, otherwise the kernel
826                  * will not allow unprivileged parents kill their privileged
827                  * children this way. We rely on the control groups kill logic
828                  * to do the rest for us. */
829                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830                         goto child_finish;
831
832                 /* Check if our parent process might already have
833                  * died? */
834                 if (getppid() == parent_pid) {
835                         for (;;) {
836                                 if (sigwait(&ss, &sig) < 0) {
837                                         if (errno == EINTR)
838                                                 continue;
839
840                                         goto child_finish;
841                                 }
842
843                                 assert(sig == SIGTERM);
844                                 break;
845                         }
846                 }
847
848                 /* If our parent died we'll end the session */
849                 if (getppid() != parent_pid) {
850                         pam_code = pam_close_session(handle, flags);
851                         if (pam_code != PAM_SUCCESS)
852                                 goto child_finish;
853                 }
854
855                 r = 0;
856
857         child_finish:
858                 pam_end(handle, pam_code | flags);
859                 _exit(r);
860         }
861
862         /* If the child was forked off successfully it will do all the
863          * cleanups, so forget about the handle here. */
864         handle = NULL;
865
866         /* Unblock SIGTERM again in the parent */
867         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
868                 goto fail;
869
870         /* We close the log explicitly here, since the PAM modules
871          * might have opened it, but we don't want this fd around. */
872         closelog();
873
874         *pam_env = e;
875         e = NULL;
876
877         return 0;
878
879 fail:
880         if (pam_code != PAM_SUCCESS) {
881                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882                 err = -EPERM;  /* PAM errors do not map to errno */
883         } else {
884                 log_error("PAM failed: %m");
885                 err = -errno;
886         }
887
888         if (handle) {
889                 if (close_session)
890                         pam_code = pam_close_session(handle, flags);
891
892                 pam_end(handle, pam_code | flags);
893         }
894
895         strv_free(e);
896
897         closelog();
898
899         if (pam_pid > 1) {
900                 kill(pam_pid, SIGTERM);
901                 kill(pam_pid, SIGCONT);
902         }
903
904         return err;
905 }
906 #endif
907
908 static void rename_process_from_path(const char *path) {
909         char process_name[11];
910         const char *p;
911         size_t l;
912
913         /* This resulting string must fit in 10 chars (i.e. the length
914          * of "/sbin/init") to look pretty in /bin/ps */
915
916         p = basename(path);
917         if (isempty(p)) {
918                 rename_process("(...)");
919                 return;
920         }
921
922         l = strlen(p);
923         if (l > 8) {
924                 /* The end of the process name is usually more
925                  * interesting, since the first bit might just be
926                  * "systemd-" */
927                 p = p + l - 8;
928                 l = 8;
929         }
930
931         process_name[0] = '(';
932         memcpy(process_name+1, p, l);
933         process_name[1+l] = ')';
934         process_name[1+l+1] = 0;
935
936         rename_process(process_name);
937 }
938
939 #ifdef HAVE_SECCOMP
940
941 static int apply_seccomp(ExecContext *c) {
942         uint32_t negative_action, action;
943         scmp_filter_ctx *seccomp;
944         Iterator i;
945         void *id;
946         int r;
947
948         assert(c);
949
950         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
951
952         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
953         if (!seccomp)
954                 return -ENOMEM;
955
956         if (c->syscall_archs) {
957
958                 SET_FOREACH(id, c->syscall_archs, i) {
959                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
960                         if (r == -EEXIST)
961                                 continue;
962                         if (r < 0)
963                                 goto finish;
964                 }
965
966         } else {
967                 r = seccomp_add_secondary_archs(seccomp);
968                 if (r < 0)
969                         goto finish;
970         }
971
972         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
973         SET_FOREACH(id, c->syscall_filter, i) {
974                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
975                 if (r < 0)
976                         goto finish;
977         }
978
979         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
980         if (r < 0)
981                 goto finish;
982
983         r = seccomp_load(seccomp);
984
985 finish:
986         seccomp_release(seccomp);
987         return r;
988 }
989
990 static int apply_address_families(ExecContext *c) {
991         scmp_filter_ctx *seccomp;
992         Iterator i;
993         int r;
994
995         assert(c);
996
997         seccomp = seccomp_init(SCMP_ACT_ALLOW);
998         if (!seccomp)
999                 return -ENOMEM;
1000
1001         r = seccomp_add_secondary_archs(seccomp);
1002         if (r < 0)
1003                 goto finish;
1004
1005         if (c->address_families_whitelist) {
1006                 int af, first = 0, last = 0;
1007                 void *afp;
1008
1009                 /* If this is a whitelist, we first block the address
1010                  * families that are out of range and then everything
1011                  * that is not in the set. First, we find the lowest
1012                  * and highest address family in the set. */
1013
1014                 SET_FOREACH(afp, c->address_families, i) {
1015                         af = PTR_TO_INT(afp);
1016
1017                         if (af <= 0 || af >= af_max())
1018                                 continue;
1019
1020                         if (first == 0 || af < first)
1021                                 first = af;
1022
1023                         if (last == 0 || af > last)
1024                                 last = af;
1025                 }
1026
1027                 assert((first == 0) == (last == 0));
1028
1029                 if (first == 0) {
1030
1031                         /* No entries in the valid range, block everything */
1032                         r = seccomp_rule_add(
1033                                         seccomp,
1034                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1035                                         SCMP_SYS(socket),
1036                                         0);
1037                         if (r < 0)
1038                                 goto finish;
1039
1040                 } else {
1041
1042                         /* Block everything below the first entry */
1043                         r = seccomp_rule_add(
1044                                         seccomp,
1045                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1046                                         SCMP_SYS(socket),
1047                                         1,
1048                                         SCMP_A0(SCMP_CMP_LT, first));
1049                         if (r < 0)
1050                                 goto finish;
1051
1052                         /* Block everything above the last entry */
1053                         r = seccomp_rule_add(
1054                                         seccomp,
1055                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1056                                         SCMP_SYS(socket),
1057                                         1,
1058                                         SCMP_A0(SCMP_CMP_GT, last));
1059                         if (r < 0)
1060                                 goto finish;
1061
1062                         /* Block everything between the first and last
1063                          * entry */
1064                         for (af = 1; af < af_max(); af++) {
1065
1066                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1067                                         continue;
1068
1069                                 r = seccomp_rule_add(
1070                                                 seccomp,
1071                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1072                                                 SCMP_SYS(socket),
1073                                                 1,
1074                                                 SCMP_A0(SCMP_CMP_EQ, af));
1075                                 if (r < 0)
1076                                         goto finish;
1077                         }
1078                 }
1079
1080         } else {
1081                 void *af;
1082
1083                 /* If this is a blacklist, then generate one rule for
1084                  * each address family that are then combined in OR
1085                  * checks. */
1086
1087                 SET_FOREACH(af, c->address_families, i) {
1088
1089                         r = seccomp_rule_add(
1090                                         seccomp,
1091                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092                                         SCMP_SYS(socket),
1093                                         1,
1094                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1095                         if (r < 0)
1096                                 goto finish;
1097                 }
1098         }
1099
1100         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1101         if (r < 0)
1102                 goto finish;
1103
1104         r = seccomp_load(seccomp);
1105
1106 finish:
1107         seccomp_release(seccomp);
1108         return r;
1109 }
1110
1111 #endif
1112
1113 static void do_idle_pipe_dance(int idle_pipe[4]) {
1114         assert(idle_pipe);
1115
1116
1117         safe_close(idle_pipe[1]);
1118         safe_close(idle_pipe[2]);
1119
1120         if (idle_pipe[0] >= 0) {
1121                 int r;
1122
1123                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1124
1125                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1126                         /* Signal systemd that we are bored and want to continue. */
1127                         write(idle_pipe[3], "x", 1);
1128
1129                         /* Wait for systemd to react to the signal above. */
1130                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1131                 }
1132
1133                 safe_close(idle_pipe[0]);
1134
1135         }
1136
1137         safe_close(idle_pipe[3]);
1138 }
1139
1140 static int build_environment(
1141                 const ExecContext *c,
1142                 unsigned n_fds,
1143                 usec_t watchdog_usec,
1144                 const char *home,
1145                 const char *username,
1146                 const char *shell,
1147                 char ***ret) {
1148
1149         _cleanup_strv_free_ char **our_env = NULL;
1150         unsigned n_env = 0;
1151         char *x;
1152
1153         assert(c);
1154         assert(ret);
1155
1156         our_env = new0(char*, 10);
1157         if (!our_env)
1158                 return -ENOMEM;
1159
1160         if (n_fds > 0) {
1161                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1162                         return -ENOMEM;
1163                 our_env[n_env++] = x;
1164
1165                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1166                         return -ENOMEM;
1167                 our_env[n_env++] = x;
1168         }
1169
1170         if (watchdog_usec > 0) {
1171                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1172                         return -ENOMEM;
1173                 our_env[n_env++] = x;
1174
1175                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1176                         return -ENOMEM;
1177                 our_env[n_env++] = x;
1178         }
1179
1180         if (home) {
1181                 x = strappend("HOME=", home);
1182                 if (!x)
1183                         return -ENOMEM;
1184                 our_env[n_env++] = x;
1185         }
1186
1187         if (username) {
1188                 x = strappend("LOGNAME=", username);
1189                 if (!x)
1190                         return -ENOMEM;
1191                 our_env[n_env++] = x;
1192
1193                 x = strappend("USER=", username);
1194                 if (!x)
1195                         return -ENOMEM;
1196                 our_env[n_env++] = x;
1197         }
1198
1199         if (shell) {
1200                 x = strappend("SHELL=", shell);
1201                 if (!x)
1202                         return -ENOMEM;
1203                 our_env[n_env++] = x;
1204         }
1205
1206         if (is_terminal_input(c->std_input) ||
1207             c->std_output == EXEC_OUTPUT_TTY ||
1208             c->std_error == EXEC_OUTPUT_TTY ||
1209             c->tty_path) {
1210
1211                 x = strdup(default_term_for_tty(tty_path(c)));
1212                 if (!x)
1213                         return -ENOMEM;
1214                 our_env[n_env++] = x;
1215         }
1216
1217         our_env[n_env++] = NULL;
1218         assert(n_env <= 10);
1219
1220         *ret = our_env;
1221         our_env = NULL;
1222
1223         return 0;
1224 }
1225
1226 int exec_spawn(ExecCommand *command,
1227                const ExecContext *context,
1228                const ExecParameters *exec_params,
1229                ExecRuntime *runtime,
1230                pid_t *ret) {
1231
1232         _cleanup_strv_free_ char **files_env = NULL;
1233         int *fds = NULL; unsigned n_fds = 0;
1234         int socket_fd;
1235         char *line, **argv;
1236         pid_t pid;
1237         int r;
1238
1239         assert(command);
1240         assert(context);
1241         assert(ret);
1242         assert(exec_params);
1243         assert(exec_params->fds || exec_params->n_fds <= 0);
1244
1245         if (context->std_input == EXEC_INPUT_SOCKET ||
1246             context->std_output == EXEC_OUTPUT_SOCKET ||
1247             context->std_error == EXEC_OUTPUT_SOCKET) {
1248
1249                 if (exec_params->n_fds != 1)
1250                         return -EINVAL;
1251
1252                 socket_fd = exec_params->fds[0];
1253         } else {
1254                 socket_fd = -1;
1255                 fds = exec_params->fds;
1256                 n_fds = exec_params->n_fds;
1257         }
1258
1259         r = exec_context_load_environment(context, &files_env);
1260         if (r < 0) {
1261                 log_struct_unit(LOG_ERR,
1262                            exec_params->unit_id,
1263                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1264                            "ERRNO=%d", -r,
1265                            NULL);
1266                 return r;
1267         }
1268
1269         argv = exec_params->argv ?: command->argv;
1270
1271         line = exec_command_line(argv);
1272         if (!line)
1273                 return log_oom();
1274
1275         log_struct_unit(LOG_DEBUG,
1276                         exec_params->unit_id,
1277                         "EXECUTABLE=%s", command->path,
1278                         "MESSAGE=About to execute: %s", line,
1279                         NULL);
1280         free(line);
1281
1282         pid = fork();
1283         if (pid < 0)
1284                 return -errno;
1285
1286         if (pid == 0) {
1287                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1288                 const char *username = NULL, *home = NULL, *shell = NULL;
1289                 unsigned n_dont_close = 0;
1290                 int dont_close[n_fds + 3];
1291                 uid_t uid = (uid_t) -1;
1292                 gid_t gid = (gid_t) -1;
1293                 int i, err;
1294
1295                 /* child */
1296
1297                 rename_process_from_path(command->path);
1298
1299                 /* We reset exactly these signals, since they are the
1300                  * only ones we set to SIG_IGN in the main daemon. All
1301                  * others we leave untouched because we set them to
1302                  * SIG_DFL or a valid handler initially, both of which
1303                  * will be demoted to SIG_DFL. */
1304                 default_signals(SIGNALS_CRASH_HANDLER,
1305                                 SIGNALS_IGNORE, -1);
1306
1307                 if (context->ignore_sigpipe)
1308                         ignore_signals(SIGPIPE, -1);
1309
1310                 err = reset_signal_mask();
1311                 if (err < 0) {
1312                         r = EXIT_SIGNAL_MASK;
1313                         goto fail_child;
1314                 }
1315
1316                 if (exec_params->idle_pipe)
1317                         do_idle_pipe_dance(exec_params->idle_pipe);
1318
1319                 /* Close sockets very early to make sure we don't
1320                  * block init reexecution because it cannot bind its
1321                  * sockets */
1322                 log_forget_fds();
1323
1324                 if (socket_fd >= 0)
1325                         dont_close[n_dont_close++] = socket_fd;
1326                 if (n_fds > 0) {
1327                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1328                         n_dont_close += n_fds;
1329                 }
1330                 if (runtime) {
1331                         if (runtime->netns_storage_socket[0] >= 0)
1332                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1333                         if (runtime->netns_storage_socket[1] >= 0)
1334                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1335                 }
1336
1337                 err = close_all_fds(dont_close, n_dont_close);
1338                 if (err < 0) {
1339                         r = EXIT_FDS;
1340                         goto fail_child;
1341                 }
1342
1343                 if (!context->same_pgrp)
1344                         if (setsid() < 0) {
1345                                 err = -errno;
1346                                 r = EXIT_SETSID;
1347                                 goto fail_child;
1348                         }
1349
1350                 exec_context_tty_reset(context);
1351
1352                 if (exec_params->confirm_spawn) {
1353                         char response;
1354
1355                         err = ask_for_confirmation(&response, argv);
1356                         if (err == -ETIMEDOUT)
1357                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1358                         else if (err < 0)
1359                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1360                         else if (response == 's') {
1361                                 write_confirm_message("Skipping execution.\n");
1362                                 err = -ECANCELED;
1363                                 r = EXIT_CONFIRM;
1364                                 goto fail_child;
1365                         } else if (response == 'n') {
1366                                 write_confirm_message("Failing execution.\n");
1367                                 err = r = 0;
1368                                 goto fail_child;
1369                         }
1370                 }
1371
1372                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1373                  * must sure to drop O_NONBLOCK */
1374                 if (socket_fd >= 0)
1375                         fd_nonblock(socket_fd, false);
1376
1377                 err = setup_input(context, socket_fd, exec_params->apply_tty_stdin);
1378                 if (err < 0) {
1379                         r = EXIT_STDIN;
1380                         goto fail_child;
1381                 }
1382
1383                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), exec_params->unit_id, exec_params->apply_tty_stdin);
1384                 if (err < 0) {
1385                         r = EXIT_STDOUT;
1386                         goto fail_child;
1387                 }
1388
1389                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), exec_params->unit_id, exec_params->apply_tty_stdin);
1390                 if (err < 0) {
1391                         r = EXIT_STDERR;
1392                         goto fail_child;
1393                 }
1394
1395                 if (exec_params->cgroup_path) {
1396                         err = cg_attach_everywhere(exec_params->cgroup_supported, exec_params->cgroup_path, 0);
1397                         if (err < 0) {
1398                                 r = EXIT_CGROUP;
1399                                 goto fail_child;
1400                         }
1401                 }
1402
1403                 if (context->oom_score_adjust_set) {
1404                         char t[16];
1405
1406                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1407                         char_array_0(t);
1408
1409                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1410                                 err = -errno;
1411                                 r = EXIT_OOM_ADJUST;
1412                                 goto fail_child;
1413                         }
1414                 }
1415
1416                 if (context->nice_set)
1417                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1418                                 err = -errno;
1419                                 r = EXIT_NICE;
1420                                 goto fail_child;
1421                         }
1422
1423                 if (context->cpu_sched_set) {
1424                         struct sched_param param = {
1425                                 .sched_priority = context->cpu_sched_priority,
1426                         };
1427
1428                         r = sched_setscheduler(0,
1429                                                context->cpu_sched_policy |
1430                                                (context->cpu_sched_reset_on_fork ?
1431                                                 SCHED_RESET_ON_FORK : 0),
1432                                                &param);
1433                         if (r < 0) {
1434                                 err = -errno;
1435                                 r = EXIT_SETSCHEDULER;
1436                                 goto fail_child;
1437                         }
1438                 }
1439
1440                 if (context->cpuset)
1441                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1442                                 err = -errno;
1443                                 r = EXIT_CPUAFFINITY;
1444                                 goto fail_child;
1445                         }
1446
1447                 if (context->ioprio_set)
1448                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1449                                 err = -errno;
1450                                 r = EXIT_IOPRIO;
1451                                 goto fail_child;
1452                         }
1453
1454                 if (context->timer_slack_nsec != NSEC_INFINITY)
1455                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1456                                 err = -errno;
1457                                 r = EXIT_TIMERSLACK;
1458                                 goto fail_child;
1459                         }
1460
1461                 if (context->personality != 0xffffffffUL)
1462                         if (personality(context->personality) < 0) {
1463                                 err = -errno;
1464                                 r = EXIT_PERSONALITY;
1465                                 goto fail_child;
1466                         }
1467
1468                 if (context->utmp_id)
1469                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1470
1471                 if (context->user) {
1472                         username = context->user;
1473                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1474                         if (err < 0) {
1475                                 r = EXIT_USER;
1476                                 goto fail_child;
1477                         }
1478
1479                         if (is_terminal_input(context->std_input)) {
1480                                 err = chown_terminal(STDIN_FILENO, uid);
1481                                 if (err < 0) {
1482                                         r = EXIT_STDIN;
1483                                         goto fail_child;
1484                                 }
1485                         }
1486                 }
1487
1488 #ifdef HAVE_PAM
1489                 if (exec_params->cgroup_path && context->user && context->pam_name) {
1490                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, exec_params->cgroup_path, 0644, uid, gid);
1491                         if (err < 0) {
1492                                 r = EXIT_CGROUP;
1493                                 goto fail_child;
1494                         }
1495
1496
1497                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, exec_params->cgroup_path, 0755, uid, gid);
1498                         if (err < 0) {
1499                                 r = EXIT_CGROUP;
1500                                 goto fail_child;
1501                         }
1502                 }
1503 #endif
1504
1505                 if (!strv_isempty(context->runtime_directory) && exec_params->runtime_prefix) {
1506                         char **rt;
1507
1508                         STRV_FOREACH(rt, context->runtime_directory) {
1509                                 _cleanup_free_ char *p;
1510
1511                                 p = strjoin(exec_params->runtime_prefix, "/", *rt, NULL);
1512                                 if (!p) {
1513                                         r = EXIT_RUNTIME_DIRECTORY;
1514                                         err = -ENOMEM;
1515                                         goto fail_child;
1516                                 }
1517
1518                                 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1519                                 if (err < 0) {
1520                                         r = EXIT_RUNTIME_DIRECTORY;
1521                                         goto fail_child;
1522                                 }
1523                         }
1524                 }
1525
1526                 if (exec_params->apply_permissions) {
1527                         err = enforce_groups(context, username, gid);
1528                         if (err < 0) {
1529                                 r = EXIT_GROUP;
1530                                 goto fail_child;
1531                         }
1532                 }
1533
1534                 umask(context->umask);
1535
1536 #ifdef HAVE_PAM
1537                 if (exec_params->apply_permissions && context->pam_name && username) {
1538                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1539                         if (err < 0) {
1540                                 r = EXIT_PAM;
1541                                 goto fail_child;
1542                         }
1543                 }
1544 #endif
1545                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1546                         err = setup_netns(runtime->netns_storage_socket);
1547                         if (err < 0) {
1548                                 r = EXIT_NETWORK;
1549                                 goto fail_child;
1550                         }
1551                 }
1552
1553                 if (!strv_isempty(context->read_write_dirs) ||
1554                     !strv_isempty(context->read_only_dirs) ||
1555                     !strv_isempty(context->inaccessible_dirs) ||
1556                     context->mount_flags != 0 ||
1557                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1558                     context->private_devices ||
1559                     context->protect_system != PROTECT_SYSTEM_NO ||
1560                     context->protect_home != PROTECT_HOME_NO) {
1561
1562                         char *tmp = NULL, *var = NULL;
1563
1564                         /* The runtime struct only contains the parent
1565                          * of the private /tmp, which is
1566                          * non-accessible to world users. Inside of it
1567                          * there's a /tmp that is sticky, and that's
1568                          * the one we want to use here. */
1569
1570                         if (context->private_tmp && runtime) {
1571                                 if (runtime->tmp_dir)
1572                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1573                                 if (runtime->var_tmp_dir)
1574                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1575                         }
1576
1577                         err = setup_namespace(
1578                                         context->read_write_dirs,
1579                                         context->read_only_dirs,
1580                                         context->inaccessible_dirs,
1581                                         tmp,
1582                                         var,
1583                                         context->private_devices,
1584                                         context->protect_home,
1585                                         context->protect_system,
1586                                         context->mount_flags);
1587                         if (err < 0) {
1588                                 r = EXIT_NAMESPACE;
1589                                 goto fail_child;
1590                         }
1591                 }
1592
1593                 if (exec_params->apply_chroot) {
1594                         if (context->root_directory)
1595                                 if (chroot(context->root_directory) < 0) {
1596                                         err = -errno;
1597                                         r = EXIT_CHROOT;
1598                                         goto fail_child;
1599                                 }
1600
1601                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1602                                 err = -errno;
1603                                 r = EXIT_CHDIR;
1604                                 goto fail_child;
1605                         }
1606                 } else {
1607                         _cleanup_free_ char *d = NULL;
1608
1609                         if (asprintf(&d, "%s/%s",
1610                                      context->root_directory ? context->root_directory : "",
1611                                      context->working_directory ? context->working_directory : "") < 0) {
1612                                 err = -ENOMEM;
1613                                 r = EXIT_MEMORY;
1614                                 goto fail_child;
1615                         }
1616
1617                         if (chdir(d) < 0) {
1618                                 err = -errno;
1619                                 r = EXIT_CHDIR;
1620                                 goto fail_child;
1621                         }
1622                 }
1623
1624                 /* We repeat the fd closing here, to make sure that
1625                  * nothing is leaked from the PAM modules. Note that
1626                  * we are more aggressive this time since socket_fd
1627                  * and the netns fds we don#t need anymore. */
1628                 err = close_all_fds(fds, n_fds);
1629                 if (err >= 0)
1630                         err = shift_fds(fds, n_fds);
1631                 if (err >= 0)
1632                         err = flags_fds(fds, n_fds, context->non_blocking);
1633                 if (err < 0) {
1634                         r = EXIT_FDS;
1635                         goto fail_child;
1636                 }
1637
1638                 if (exec_params->apply_permissions) {
1639
1640                         for (i = 0; i < _RLIMIT_MAX; i++) {
1641                                 if (!context->rlimit[i])
1642                                         continue;
1643
1644                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1645                                         err = -errno;
1646                                         r = EXIT_LIMITS;
1647                                         goto fail_child;
1648                                 }
1649                         }
1650
1651                         if (context->capability_bounding_set_drop) {
1652                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1653                                 if (err < 0) {
1654                                         r = EXIT_CAPABILITIES;
1655                                         goto fail_child;
1656                                 }
1657                         }
1658
1659                         if (context->user) {
1660                                 err = enforce_user(context, uid);
1661                                 if (err < 0) {
1662                                         r = EXIT_USER;
1663                                         goto fail_child;
1664                                 }
1665                         }
1666
1667                         /* PR_GET_SECUREBITS is not privileged, while
1668                          * PR_SET_SECUREBITS is. So to suppress
1669                          * potential EPERMs we'll try not to call
1670                          * PR_SET_SECUREBITS unless necessary. */
1671                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1672                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1673                                         err = -errno;
1674                                         r = EXIT_SECUREBITS;
1675                                         goto fail_child;
1676                                 }
1677
1678                         if (context->capabilities)
1679                                 if (cap_set_proc(context->capabilities) < 0) {
1680                                         err = -errno;
1681                                         r = EXIT_CAPABILITIES;
1682                                         goto fail_child;
1683                                 }
1684
1685                         if (context->no_new_privileges)
1686                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1687                                         err = -errno;
1688                                         r = EXIT_NO_NEW_PRIVILEGES;
1689                                         goto fail_child;
1690                                 }
1691
1692 #ifdef HAVE_SECCOMP
1693                         if (context->address_families_whitelist ||
1694                             !set_isempty(context->address_families)) {
1695                                 err = apply_address_families(context);
1696                                 if (err < 0) {
1697                                         r = EXIT_ADDRESS_FAMILIES;
1698                                         goto fail_child;
1699                                 }
1700                         }
1701
1702                         if (context->syscall_whitelist ||
1703                             !set_isempty(context->syscall_filter) ||
1704                             !set_isempty(context->syscall_archs)) {
1705                                 err = apply_seccomp(context);
1706                                 if (err < 0) {
1707                                         r = EXIT_SECCOMP;
1708                                         goto fail_child;
1709                                 }
1710                         }
1711 #endif
1712
1713 #ifdef HAVE_SELINUX
1714                         if (context->selinux_context && use_selinux()) {
1715                                 err = setexeccon(context->selinux_context);
1716                                 if (err < 0 && !context->selinux_context_ignore) {
1717                                         r = EXIT_SELINUX_CONTEXT;
1718                                         goto fail_child;
1719                                 }
1720                         }
1721 #endif
1722
1723 #ifdef HAVE_APPARMOR
1724                         if (context->apparmor_profile && use_apparmor()) {
1725                                 err = aa_change_onexec(context->apparmor_profile);
1726                                 if (err < 0 && !context->apparmor_profile_ignore) {
1727                                         r = EXIT_APPARMOR_PROFILE;
1728                                         goto fail_child;
1729                                 }
1730                         }
1731 #endif
1732                 }
1733
1734                 err = build_environment(context, n_fds, exec_params->watchdog_usec, home, username, shell, &our_env);
1735                 if (r < 0) {
1736                         r = EXIT_MEMORY;
1737                         goto fail_child;
1738                 }
1739
1740                 final_env = strv_env_merge(5,
1741                                            exec_params->environment,
1742                                            our_env,
1743                                            context->environment,
1744                                            files_env,
1745                                            pam_env,
1746                                            NULL);
1747                 if (!final_env) {
1748                         err = -ENOMEM;
1749                         r = EXIT_MEMORY;
1750                         goto fail_child;
1751                 }
1752
1753                 final_argv = replace_env_argv(argv, final_env);
1754                 if (!final_argv) {
1755                         err = -ENOMEM;
1756                         r = EXIT_MEMORY;
1757                         goto fail_child;
1758                 }
1759
1760                 final_env = strv_env_clean(final_env);
1761
1762                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1763                         line = exec_command_line(final_argv);
1764                         if (line) {
1765                                 log_open();
1766                                 log_struct_unit(LOG_DEBUG,
1767                                                 exec_params->unit_id,
1768                                                 "EXECUTABLE=%s", command->path,
1769                                                 "MESSAGE=Executing: %s", line,
1770                                                 NULL);
1771                                 log_close();
1772                                 free(line);
1773                                 line = NULL;
1774                         }
1775                 }
1776                 execve(command->path, final_argv, final_env);
1777                 err = -errno;
1778                 r = EXIT_EXEC;
1779
1780         fail_child:
1781                 if (r != 0) {
1782                         log_open();
1783                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1784                                    "EXECUTABLE=%s", command->path,
1785                                    "MESSAGE=Failed at step %s spawning %s: %s",
1786                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1787                                           command->path, strerror(-err),
1788                                    "ERRNO=%d", -err,
1789                                    NULL);
1790                         log_close();
1791                 }
1792
1793                 _exit(r);
1794         }
1795
1796         log_struct_unit(LOG_DEBUG,
1797                         exec_params->unit_id,
1798                         "MESSAGE=Forked %s as "PID_FMT,
1799                         command->path, pid,
1800                         NULL);
1801
1802         /* We add the new process to the cgroup both in the child (so
1803          * that we can be sure that no user code is ever executed
1804          * outside of the cgroup) and in the parent (so that we can be
1805          * sure that when we kill the cgroup the process will be
1806          * killed too). */
1807         if (exec_params->cgroup_path)
1808                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, exec_params->cgroup_path, pid);
1809
1810         exec_status_start(&command->exec_status, pid);
1811
1812         *ret = pid;
1813         return 0;
1814 }
1815
1816 void exec_context_init(ExecContext *c) {
1817         assert(c);
1818
1819         c->umask = 0022;
1820         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1821         c->cpu_sched_policy = SCHED_OTHER;
1822         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1823         c->syslog_level_prefix = true;
1824         c->ignore_sigpipe = true;
1825         c->timer_slack_nsec = NSEC_INFINITY;
1826         c->personality = 0xffffffffUL;
1827         c->runtime_directory_mode = 0755;
1828 }
1829
1830 void exec_context_done(ExecContext *c) {
1831         unsigned l;
1832
1833         assert(c);
1834
1835         strv_free(c->environment);
1836         c->environment = NULL;
1837
1838         strv_free(c->environment_files);
1839         c->environment_files = NULL;
1840
1841         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1842                 free(c->rlimit[l]);
1843                 c->rlimit[l] = NULL;
1844         }
1845
1846         free(c->working_directory);
1847         c->working_directory = NULL;
1848         free(c->root_directory);
1849         c->root_directory = NULL;
1850
1851         free(c->tty_path);
1852         c->tty_path = NULL;
1853
1854         free(c->syslog_identifier);
1855         c->syslog_identifier = NULL;
1856
1857         free(c->user);
1858         c->user = NULL;
1859
1860         free(c->group);
1861         c->group = NULL;
1862
1863         strv_free(c->supplementary_groups);
1864         c->supplementary_groups = NULL;
1865
1866         free(c->pam_name);
1867         c->pam_name = NULL;
1868
1869         if (c->capabilities) {
1870                 cap_free(c->capabilities);
1871                 c->capabilities = NULL;
1872         }
1873
1874         strv_free(c->read_only_dirs);
1875         c->read_only_dirs = NULL;
1876
1877         strv_free(c->read_write_dirs);
1878         c->read_write_dirs = NULL;
1879
1880         strv_free(c->inaccessible_dirs);
1881         c->inaccessible_dirs = NULL;
1882
1883         if (c->cpuset)
1884                 CPU_FREE(c->cpuset);
1885
1886         free(c->utmp_id);
1887         c->utmp_id = NULL;
1888
1889         free(c->selinux_context);
1890         c->selinux_context = NULL;
1891
1892         free(c->apparmor_profile);
1893         c->apparmor_profile = NULL;
1894
1895         set_free(c->syscall_filter);
1896         c->syscall_filter = NULL;
1897
1898         set_free(c->syscall_archs);
1899         c->syscall_archs = NULL;
1900
1901         set_free(c->address_families);
1902         c->address_families = NULL;
1903
1904         strv_free(c->runtime_directory);
1905         c->runtime_directory = NULL;
1906 }
1907
1908 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1909         char **i;
1910
1911         assert(c);
1912
1913         if (!runtime_prefix)
1914                 return 0;
1915
1916         STRV_FOREACH(i, c->runtime_directory) {
1917                 _cleanup_free_ char *p;
1918
1919                 p = strjoin(runtime_prefix, "/", *i, NULL);
1920                 if (!p)
1921                         return -ENOMEM;
1922
1923                 /* We execute this synchronously, since we need to be
1924                  * sure this is gone when we start the service
1925                  * next. */
1926                 rm_rf_dangerous(p, false, true, false);
1927         }
1928
1929         return 0;
1930 }
1931
1932 void exec_command_done(ExecCommand *c) {
1933         assert(c);
1934
1935         free(c->path);
1936         c->path = NULL;
1937
1938         strv_free(c->argv);
1939         c->argv = NULL;
1940 }
1941
1942 void exec_command_done_array(ExecCommand *c, unsigned n) {
1943         unsigned i;
1944
1945         for (i = 0; i < n; i++)
1946                 exec_command_done(c+i);
1947 }
1948
1949 void exec_command_free_list(ExecCommand *c) {
1950         ExecCommand *i;
1951
1952         while ((i = c)) {
1953                 LIST_REMOVE(command, c, i);
1954                 exec_command_done(i);
1955                 free(i);
1956         }
1957 }
1958
1959 void exec_command_free_array(ExecCommand **c, unsigned n) {
1960         unsigned i;
1961
1962         for (i = 0; i < n; i++) {
1963                 exec_command_free_list(c[i]);
1964                 c[i] = NULL;
1965         }
1966 }
1967
1968 int exec_context_load_environment(const ExecContext *c, char ***l) {
1969         char **i, **r = NULL;
1970
1971         assert(c);
1972         assert(l);
1973
1974         STRV_FOREACH(i, c->environment_files) {
1975                 char *fn;
1976                 int k;
1977                 bool ignore = false;
1978                 char **p;
1979                 _cleanup_globfree_ glob_t pglob = {};
1980                 int count, n;
1981
1982                 fn = *i;
1983
1984                 if (fn[0] == '-') {
1985                         ignore = true;
1986                         fn ++;
1987                 }
1988
1989                 if (!path_is_absolute(fn)) {
1990                         if (ignore)
1991                                 continue;
1992
1993                         strv_free(r);
1994                         return -EINVAL;
1995                 }
1996
1997                 /* Filename supports globbing, take all matching files */
1998                 errno = 0;
1999                 if (glob(fn, 0, NULL, &pglob) != 0) {
2000                         if (ignore)
2001                                 continue;
2002
2003                         strv_free(r);
2004                         return errno ? -errno : -EINVAL;
2005                 }
2006                 count = pglob.gl_pathc;
2007                 if (count == 0) {
2008                         if (ignore)
2009                                 continue;
2010
2011                         strv_free(r);
2012                         return -EINVAL;
2013                 }
2014                 for (n = 0; n < count; n++) {
2015                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2016                         if (k < 0) {
2017                                 if (ignore)
2018                                         continue;
2019
2020                                 strv_free(r);
2021                                 return k;
2022                         }
2023                         /* Log invalid environment variables with filename */
2024                         if (p)
2025                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2026
2027                         if (r == NULL)
2028                                 r = p;
2029                         else {
2030                                 char **m;
2031
2032                                 m = strv_env_merge(2, r, p);
2033                                 strv_free(r);
2034                                 strv_free(p);
2035                                 if (!m)
2036                                         return -ENOMEM;
2037
2038                                 r = m;
2039                         }
2040                 }
2041         }
2042
2043         *l = r;
2044
2045         return 0;
2046 }
2047
2048 static bool tty_may_match_dev_console(const char *tty) {
2049         _cleanup_free_ char *active = NULL;
2050        char *console;
2051
2052         if (startswith(tty, "/dev/"))
2053                 tty += 5;
2054
2055         /* trivial identity? */
2056         if (streq(tty, "console"))
2057                 return true;
2058
2059         console = resolve_dev_console(&active);
2060         /* if we could not resolve, assume it may */
2061         if (!console)
2062                 return true;
2063
2064         /* "tty0" means the active VC, so it may be the same sometimes */
2065         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2066 }
2067
2068 bool exec_context_may_touch_console(ExecContext *ec) {
2069         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2070                 is_terminal_input(ec->std_input) ||
2071                 is_terminal_output(ec->std_output) ||
2072                 is_terminal_output(ec->std_error)) &&
2073                tty_may_match_dev_console(tty_path(ec));
2074 }
2075
2076 static void strv_fprintf(FILE *f, char **l) {
2077         char **g;
2078
2079         assert(f);
2080
2081         STRV_FOREACH(g, l)
2082                 fprintf(f, " %s", *g);
2083 }
2084
2085 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2086         char **e;
2087         unsigned i;
2088
2089         assert(c);
2090         assert(f);
2091
2092         prefix = strempty(prefix);
2093
2094         fprintf(f,
2095                 "%sUMask: %04o\n"
2096                 "%sWorkingDirectory: %s\n"
2097                 "%sRootDirectory: %s\n"
2098                 "%sNonBlocking: %s\n"
2099                 "%sPrivateTmp: %s\n"
2100                 "%sPrivateNetwork: %s\n"
2101                 "%sPrivateDevices: %s\n"
2102                 "%sProtectHome: %s\n"
2103                 "%sProtectSystem: %s\n"
2104                 "%sIgnoreSIGPIPE: %s\n",
2105                 prefix, c->umask,
2106                 prefix, c->working_directory ? c->working_directory : "/",
2107                 prefix, c->root_directory ? c->root_directory : "/",
2108                 prefix, yes_no(c->non_blocking),
2109                 prefix, yes_no(c->private_tmp),
2110                 prefix, yes_no(c->private_network),
2111                 prefix, yes_no(c->private_devices),
2112                 prefix, protect_home_to_string(c->protect_home),
2113                 prefix, protect_system_to_string(c->protect_system),
2114                 prefix, yes_no(c->ignore_sigpipe));
2115
2116         STRV_FOREACH(e, c->environment)
2117                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2118
2119         STRV_FOREACH(e, c->environment_files)
2120                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2121
2122         if (c->nice_set)
2123                 fprintf(f,
2124                         "%sNice: %i\n",
2125                         prefix, c->nice);
2126
2127         if (c->oom_score_adjust_set)
2128                 fprintf(f,
2129                         "%sOOMScoreAdjust: %i\n",
2130                         prefix, c->oom_score_adjust);
2131
2132         for (i = 0; i < RLIM_NLIMITS; i++)
2133                 if (c->rlimit[i])
2134                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2135                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2136
2137         if (c->ioprio_set) {
2138                 _cleanup_free_ char *class_str = NULL;
2139
2140                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2141                 fprintf(f,
2142                         "%sIOSchedulingClass: %s\n"
2143                         "%sIOPriority: %i\n",
2144                         prefix, strna(class_str),
2145                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2146         }
2147
2148         if (c->cpu_sched_set) {
2149                 _cleanup_free_ char *policy_str = NULL;
2150
2151                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2152                 fprintf(f,
2153                         "%sCPUSchedulingPolicy: %s\n"
2154                         "%sCPUSchedulingPriority: %i\n"
2155                         "%sCPUSchedulingResetOnFork: %s\n",
2156                         prefix, strna(policy_str),
2157                         prefix, c->cpu_sched_priority,
2158                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2159         }
2160
2161         if (c->cpuset) {
2162                 fprintf(f, "%sCPUAffinity:", prefix);
2163                 for (i = 0; i < c->cpuset_ncpus; i++)
2164                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2165                                 fprintf(f, " %u", i);
2166                 fputs("\n", f);
2167         }
2168
2169         if (c->timer_slack_nsec != NSEC_INFINITY)
2170                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2171
2172         fprintf(f,
2173                 "%sStandardInput: %s\n"
2174                 "%sStandardOutput: %s\n"
2175                 "%sStandardError: %s\n",
2176                 prefix, exec_input_to_string(c->std_input),
2177                 prefix, exec_output_to_string(c->std_output),
2178                 prefix, exec_output_to_string(c->std_error));
2179
2180         if (c->tty_path)
2181                 fprintf(f,
2182                         "%sTTYPath: %s\n"
2183                         "%sTTYReset: %s\n"
2184                         "%sTTYVHangup: %s\n"
2185                         "%sTTYVTDisallocate: %s\n",
2186                         prefix, c->tty_path,
2187                         prefix, yes_no(c->tty_reset),
2188                         prefix, yes_no(c->tty_vhangup),
2189                         prefix, yes_no(c->tty_vt_disallocate));
2190
2191         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2192             c->std_output == EXEC_OUTPUT_KMSG ||
2193             c->std_output == EXEC_OUTPUT_JOURNAL ||
2194             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2195             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2196             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2197             c->std_error == EXEC_OUTPUT_SYSLOG ||
2198             c->std_error == EXEC_OUTPUT_KMSG ||
2199             c->std_error == EXEC_OUTPUT_JOURNAL ||
2200             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2201             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2202             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2203
2204                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2205
2206                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2207                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2208
2209                 fprintf(f,
2210                         "%sSyslogFacility: %s\n"
2211                         "%sSyslogLevel: %s\n",
2212                         prefix, strna(fac_str),
2213                         prefix, strna(lvl_str));
2214         }
2215
2216         if (c->capabilities) {
2217                 _cleanup_cap_free_charp_ char *t;
2218
2219                 t = cap_to_text(c->capabilities, NULL);
2220                 if (t)
2221                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2222         }
2223
2224         if (c->secure_bits)
2225                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2226                         prefix,
2227                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2228                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2229                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2230                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2231                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2232                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2233
2234         if (c->capability_bounding_set_drop) {
2235                 unsigned long l;
2236                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2237
2238                 for (l = 0; l <= cap_last_cap(); l++)
2239                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2240                                 _cleanup_cap_free_charp_ char *t;
2241
2242                                 t = cap_to_name(l);
2243                                 if (t)
2244                                         fprintf(f, " %s", t);
2245                         }
2246
2247                 fputs("\n", f);
2248         }
2249
2250         if (c->user)
2251                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2252         if (c->group)
2253                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2254
2255         if (strv_length(c->supplementary_groups) > 0) {
2256                 fprintf(f, "%sSupplementaryGroups:", prefix);
2257                 strv_fprintf(f, c->supplementary_groups);
2258                 fputs("\n", f);
2259         }
2260
2261         if (c->pam_name)
2262                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2263
2264         if (strv_length(c->read_write_dirs) > 0) {
2265                 fprintf(f, "%sReadWriteDirs:", prefix);
2266                 strv_fprintf(f, c->read_write_dirs);
2267                 fputs("\n", f);
2268         }
2269
2270         if (strv_length(c->read_only_dirs) > 0) {
2271                 fprintf(f, "%sReadOnlyDirs:", prefix);
2272                 strv_fprintf(f, c->read_only_dirs);
2273                 fputs("\n", f);
2274         }
2275
2276         if (strv_length(c->inaccessible_dirs) > 0) {
2277                 fprintf(f, "%sInaccessibleDirs:", prefix);
2278                 strv_fprintf(f, c->inaccessible_dirs);
2279                 fputs("\n", f);
2280         }
2281
2282         if (c->utmp_id)
2283                 fprintf(f,
2284                         "%sUtmpIdentifier: %s\n",
2285                         prefix, c->utmp_id);
2286
2287         if (c->selinux_context)
2288                 fprintf(f,
2289                         "%sSELinuxContext: %s%s\n",
2290                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2291
2292         if (c->personality != 0xffffffffUL)
2293                 fprintf(f,
2294                         "%sPersonality: %s\n",
2295                         prefix, strna(personality_to_string(c->personality)));
2296
2297         if (c->syscall_filter) {
2298 #ifdef HAVE_SECCOMP
2299                 Iterator j;
2300                 void *id;
2301                 bool first = true;
2302 #endif
2303
2304                 fprintf(f,
2305                         "%sSystemCallFilter: ",
2306                         prefix);
2307
2308                 if (!c->syscall_whitelist)
2309                         fputc('~', f);
2310
2311 #ifdef HAVE_SECCOMP
2312                 SET_FOREACH(id, c->syscall_filter, j) {
2313                         _cleanup_free_ char *name = NULL;
2314
2315                         if (first)
2316                                 first = false;
2317                         else
2318                                 fputc(' ', f);
2319
2320                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2321                         fputs(strna(name), f);
2322                 }
2323 #endif
2324
2325                 fputc('\n', f);
2326         }
2327
2328         if (c->syscall_archs) {
2329 #ifdef HAVE_SECCOMP
2330                 Iterator j;
2331                 void *id;
2332 #endif
2333
2334                 fprintf(f,
2335                         "%sSystemCallArchitectures:",
2336                         prefix);
2337
2338 #ifdef HAVE_SECCOMP
2339                 SET_FOREACH(id, c->syscall_archs, j)
2340                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2341 #endif
2342                 fputc('\n', f);
2343         }
2344
2345         if (c->syscall_errno != 0)
2346                 fprintf(f,
2347                         "%sSystemCallErrorNumber: %s\n",
2348                         prefix, strna(errno_to_name(c->syscall_errno)));
2349
2350         if (c->apparmor_profile)
2351                 fprintf(f,
2352                         "%sAppArmorProfile: %s%s\n",
2353                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2354 }
2355
2356 void exec_status_start(ExecStatus *s, pid_t pid) {
2357         assert(s);
2358
2359         zero(*s);
2360         s->pid = pid;
2361         dual_timestamp_get(&s->start_timestamp);
2362 }
2363
2364 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2365         assert(s);
2366
2367         if (s->pid && s->pid != pid)
2368                 zero(*s);
2369
2370         s->pid = pid;
2371         dual_timestamp_get(&s->exit_timestamp);
2372
2373         s->code = code;
2374         s->status = status;
2375
2376         if (context) {
2377                 if (context->utmp_id)
2378                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2379
2380                 exec_context_tty_reset(context);
2381         }
2382 }
2383
2384 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2385         char buf[FORMAT_TIMESTAMP_MAX];
2386
2387         assert(s);
2388         assert(f);
2389
2390         if (s->pid <= 0)
2391                 return;
2392
2393         prefix = strempty(prefix);
2394
2395         fprintf(f,
2396                 "%sPID: "PID_FMT"\n",
2397                 prefix, s->pid);
2398
2399         if (s->start_timestamp.realtime > 0)
2400                 fprintf(f,
2401                         "%sStart Timestamp: %s\n",
2402                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2403
2404         if (s->exit_timestamp.realtime > 0)
2405                 fprintf(f,
2406                         "%sExit Timestamp: %s\n"
2407                         "%sExit Code: %s\n"
2408                         "%sExit Status: %i\n",
2409                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2410                         prefix, sigchld_code_to_string(s->code),
2411                         prefix, s->status);
2412 }
2413
2414 char *exec_command_line(char **argv) {
2415         size_t k;
2416         char *n, *p, **a;
2417         bool first = true;
2418
2419         assert(argv);
2420
2421         k = 1;
2422         STRV_FOREACH(a, argv)
2423                 k += strlen(*a)+3;
2424
2425         if (!(n = new(char, k)))
2426                 return NULL;
2427
2428         p = n;
2429         STRV_FOREACH(a, argv) {
2430
2431                 if (!first)
2432                         *(p++) = ' ';
2433                 else
2434                         first = false;
2435
2436                 if (strpbrk(*a, WHITESPACE)) {
2437                         *(p++) = '\'';
2438                         p = stpcpy(p, *a);
2439                         *(p++) = '\'';
2440                 } else
2441                         p = stpcpy(p, *a);
2442
2443         }
2444
2445         *p = 0;
2446
2447         /* FIXME: this doesn't really handle arguments that have
2448          * spaces and ticks in them */
2449
2450         return n;
2451 }
2452
2453 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2454         _cleanup_free_ char *cmd = NULL;
2455         const char *prefix2;
2456
2457         assert(c);
2458         assert(f);
2459
2460         prefix = strempty(prefix);
2461         prefix2 = strappenda(prefix, "\t");
2462
2463         cmd = exec_command_line(c->argv);
2464         fprintf(f,
2465                 "%sCommand Line: %s\n",
2466                 prefix, cmd ? cmd : strerror(ENOMEM));
2467
2468         exec_status_dump(&c->exec_status, f, prefix2);
2469 }
2470
2471 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2472         assert(f);
2473
2474         prefix = strempty(prefix);
2475
2476         LIST_FOREACH(command, c, c)
2477                 exec_command_dump(c, f, prefix);
2478 }
2479
2480 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2481         ExecCommand *end;
2482
2483         assert(l);
2484         assert(e);
2485
2486         if (*l) {
2487                 /* It's kind of important, that we keep the order here */
2488                 LIST_FIND_TAIL(command, *l, end);
2489                 LIST_INSERT_AFTER(command, *l, end, e);
2490         } else
2491               *l = e;
2492 }
2493
2494 int exec_command_set(ExecCommand *c, const char *path, ...) {
2495         va_list ap;
2496         char **l, *p;
2497
2498         assert(c);
2499         assert(path);
2500
2501         va_start(ap, path);
2502         l = strv_new_ap(path, ap);
2503         va_end(ap);
2504
2505         if (!l)
2506                 return -ENOMEM;
2507
2508         p = strdup(path);
2509         if (!p) {
2510                 strv_free(l);
2511                 return -ENOMEM;
2512         }
2513
2514         free(c->path);
2515         c->path = p;
2516
2517         strv_free(c->argv);
2518         c->argv = l;
2519
2520         return 0;
2521 }
2522
2523 static int exec_runtime_allocate(ExecRuntime **rt) {
2524
2525         if (*rt)
2526                 return 0;
2527
2528         *rt = new0(ExecRuntime, 1);
2529         if (!*rt)
2530                 return -ENOMEM;
2531
2532         (*rt)->n_ref = 1;
2533         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2534
2535         return 0;
2536 }
2537
2538 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2539         int r;
2540
2541         assert(rt);
2542         assert(c);
2543         assert(id);
2544
2545         if (*rt)
2546                 return 1;
2547
2548         if (!c->private_network && !c->private_tmp)
2549                 return 0;
2550
2551         r = exec_runtime_allocate(rt);
2552         if (r < 0)
2553                 return r;
2554
2555         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2556                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2557                         return -errno;
2558         }
2559
2560         if (c->private_tmp && !(*rt)->tmp_dir) {
2561                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2562                 if (r < 0)
2563                         return r;
2564         }
2565
2566         return 1;
2567 }
2568
2569 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2570         assert(r);
2571         assert(r->n_ref > 0);
2572
2573         r->n_ref++;
2574         return r;
2575 }
2576
2577 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2578
2579         if (!r)
2580                 return NULL;
2581
2582         assert(r->n_ref > 0);
2583
2584         r->n_ref--;
2585         if (r->n_ref <= 0) {
2586                 free(r->tmp_dir);
2587                 free(r->var_tmp_dir);
2588                 safe_close_pair(r->netns_storage_socket);
2589                 free(r);
2590         }
2591
2592         return NULL;
2593 }
2594
2595 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2596         assert(u);
2597         assert(f);
2598         assert(fds);
2599
2600         if (!rt)
2601                 return 0;
2602
2603         if (rt->tmp_dir)
2604                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2605
2606         if (rt->var_tmp_dir)
2607                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2608
2609         if (rt->netns_storage_socket[0] >= 0) {
2610                 int copy;
2611
2612                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2613                 if (copy < 0)
2614                         return copy;
2615
2616                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2617         }
2618
2619         if (rt->netns_storage_socket[1] >= 0) {
2620                 int copy;
2621
2622                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2623                 if (copy < 0)
2624                         return copy;
2625
2626                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2627         }
2628
2629         return 0;
2630 }
2631
2632 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2633         int r;
2634
2635         assert(rt);
2636         assert(key);
2637         assert(value);
2638
2639         if (streq(key, "tmp-dir")) {
2640                 char *copy;
2641
2642                 r = exec_runtime_allocate(rt);
2643                 if (r < 0)
2644                         return r;
2645
2646                 copy = strdup(value);
2647                 if (!copy)
2648                         return log_oom();
2649
2650                 free((*rt)->tmp_dir);
2651                 (*rt)->tmp_dir = copy;
2652
2653         } else if (streq(key, "var-tmp-dir")) {
2654                 char *copy;
2655
2656                 r = exec_runtime_allocate(rt);
2657                 if (r < 0)
2658                         return r;
2659
2660                 copy = strdup(value);
2661                 if (!copy)
2662                         return log_oom();
2663
2664                 free((*rt)->var_tmp_dir);
2665                 (*rt)->var_tmp_dir = copy;
2666
2667         } else if (streq(key, "netns-socket-0")) {
2668                 int fd;
2669
2670                 r = exec_runtime_allocate(rt);
2671                 if (r < 0)
2672                         return r;
2673
2674                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2675                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2676                 else {
2677                         safe_close((*rt)->netns_storage_socket[0]);
2678                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2679                 }
2680         } else if (streq(key, "netns-socket-1")) {
2681                 int fd;
2682
2683                 r = exec_runtime_allocate(rt);
2684                 if (r < 0)
2685                         return r;
2686
2687                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2688                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2689                 else {
2690                         safe_close((*rt)->netns_storage_socket[1]);
2691                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2692                 }
2693         } else
2694                 return 0;
2695
2696         return 1;
2697 }
2698
2699 static void *remove_tmpdir_thread(void *p) {
2700         _cleanup_free_ char *path = p;
2701
2702         rm_rf_dangerous(path, false, true, false);
2703         return NULL;
2704 }
2705
2706 void exec_runtime_destroy(ExecRuntime *rt) {
2707         int r;
2708
2709         if (!rt)
2710                 return;
2711
2712         /* If there are multiple users of this, let's leave the stuff around */
2713         if (rt->n_ref > 1)
2714                 return;
2715
2716         if (rt->tmp_dir) {
2717                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2718
2719                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2720                 if (r < 0) {
2721                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2722                         free(rt->tmp_dir);
2723                 }
2724
2725                 rt->tmp_dir = NULL;
2726         }
2727
2728         if (rt->var_tmp_dir) {
2729                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2730
2731                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2732                 if (r < 0) {
2733                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2734                         free(rt->var_tmp_dir);
2735                 }
2736
2737                 rt->var_tmp_dir = NULL;
2738         }
2739
2740         safe_close_pair(rt->netns_storage_socket);
2741 }
2742
2743 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2744         [EXEC_INPUT_NULL] = "null",
2745         [EXEC_INPUT_TTY] = "tty",
2746         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2747         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2748         [EXEC_INPUT_SOCKET] = "socket"
2749 };
2750
2751 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2752
2753 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2754         [EXEC_OUTPUT_INHERIT] = "inherit",
2755         [EXEC_OUTPUT_NULL] = "null",
2756         [EXEC_OUTPUT_TTY] = "tty",
2757         [EXEC_OUTPUT_SYSLOG] = "syslog",
2758         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2759         [EXEC_OUTPUT_KMSG] = "kmsg",
2760         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2761         [EXEC_OUTPUT_JOURNAL] = "journal",
2762         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2763         [EXEC_OUTPUT_SOCKET] = "socket"
2764 };
2765
2766 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);