chiark / gitweb /
b5b22472d5a9f8ff954ad409484e4cef7aaedca8
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100         int start, restart_from;
101
102         if (n_fds <= 0)
103                 return 0;
104
105         /* Modifies the fds array! (sorts it) */
106
107         assert(fds);
108
109         start = 0;
110         for (;;) {
111                 int i;
112
113                 restart_from = -1;
114
115                 for (i = start; i < (int) n_fds; i++) {
116                         int nfd;
117
118                         /* Already at right index? */
119                         if (fds[i] == i+3)
120                                 continue;
121
122                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123                                 return -errno;
124
125                         safe_close(fds[i]);
126                         fds[i] = nfd;
127
128                         /* Hmm, the fd we wanted isn't free? Then
129                          * let's remember that and try again from here*/
130                         if (nfd != i+3 && restart_from < 0)
131                                 restart_from = i;
132                 }
133
134                 if (restart_from < 0)
135                         break;
136
137                 start = restart_from;
138         }
139
140         return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144         unsigned i;
145         int r;
146
147         if (n_fds <= 0)
148                 return 0;
149
150         assert(fds);
151
152         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154         for (i = 0; i < n_fds; i++) {
155
156                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157                         return r;
158
159                 /* We unconditionally drop FD_CLOEXEC from the fds,
160                  * since after all we want to pass these fds to our
161                  * children */
162
163                 if ((r = fd_cloexec(fds[i], false)) < 0)
164                         return r;
165         }
166
167         return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171         assert(context);
172
173         if (context->tty_path)
174                 return context->tty_path;
175
176         return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180         assert(context);
181
182         if (context->tty_vhangup)
183                 terminal_vhangup(tty_path(context));
184
185         if (context->tty_reset)
186                 reset_terminal(tty_path(context));
187
188         if (context->tty_vt_disallocate && context->tty_path)
189                 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193         return
194                 o == EXEC_OUTPUT_TTY ||
195                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201         int fd, r;
202
203         assert(nfd >= 0);
204
205         fd = open("/dev/null", flags|O_NOCTTY);
206         if (fd < 0)
207                 return -errno;
208
209         if (fd != nfd) {
210                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211                 safe_close(fd);
212         } else
213                 r = nfd;
214
215         return r;
216 }
217
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
219         int fd, r;
220         union sockaddr_union sa = {
221                 .un.sun_family = AF_UNIX,
222                 .un.sun_path = "/run/systemd/journal/stdout",
223         };
224
225         assert(context);
226         assert(output < _EXEC_OUTPUT_MAX);
227         assert(ident);
228         assert(nfd >= 0);
229
230         fd = socket(AF_UNIX, SOCK_STREAM, 0);
231         if (fd < 0)
232                 return -errno;
233
234         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
235         if (r < 0) {
236                 safe_close(fd);
237                 return -errno;
238         }
239
240         if (shutdown(fd, SHUT_RD) < 0) {
241                 safe_close(fd);
242                 return -errno;
243         }
244
245         fd_inc_sndbuf(fd, SNDBUF_SIZE);
246
247         dprintf(fd,
248                 "%s\n"
249                 "%s\n"
250                 "%i\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n",
255                 context->syslog_identifier ? context->syslog_identifier : ident,
256                 unit_id,
257                 context->syslog_priority,
258                 !!context->syslog_level_prefix,
259                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261                 is_terminal_output(output));
262
263         if (fd != nfd) {
264                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
265                 safe_close(fd);
266         } else
267                 r = nfd;
268
269         return r;
270 }
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
272         int fd, r;
273
274         assert(path);
275         assert(nfd >= 0);
276
277         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
278                 return fd;
279
280         if (fd != nfd) {
281                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
282                 safe_close(fd);
283         } else
284                 r = nfd;
285
286         return r;
287 }
288
289 static bool is_terminal_input(ExecInput i) {
290         return
291                 i == EXEC_INPUT_TTY ||
292                 i == EXEC_INPUT_TTY_FORCE ||
293                 i == EXEC_INPUT_TTY_FAIL;
294 }
295
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
297
298         if (is_terminal_input(std_input) && !apply_tty_stdin)
299                 return EXEC_INPUT_NULL;
300
301         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302                 return EXEC_INPUT_NULL;
303
304         return std_input;
305 }
306
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
308
309         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310                 return EXEC_OUTPUT_INHERIT;
311
312         return std_output;
313 }
314
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
316         ExecInput i;
317
318         assert(context);
319
320         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
321
322         switch (i) {
323
324         case EXEC_INPUT_NULL:
325                 return open_null_as(O_RDONLY, STDIN_FILENO);
326
327         case EXEC_INPUT_TTY:
328         case EXEC_INPUT_TTY_FORCE:
329         case EXEC_INPUT_TTY_FAIL: {
330                 int fd, r;
331
332                 fd = acquire_terminal(tty_path(context),
333                                       i == EXEC_INPUT_TTY_FAIL,
334                                       i == EXEC_INPUT_TTY_FORCE,
335                                       false,
336                                       USEC_INFINITY);
337                 if (fd < 0)
338                         return fd;
339
340                 if (fd != STDIN_FILENO) {
341                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
342                         safe_close(fd);
343                 } else
344                         r = STDIN_FILENO;
345
346                 return r;
347         }
348
349         case EXEC_INPUT_SOCKET:
350                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
351
352         default:
353                 assert_not_reached("Unknown input type");
354         }
355 }
356
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
358         ExecOutput o;
359         ExecInput i;
360         int r;
361
362         assert(context);
363         assert(ident);
364
365         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366         o = fixup_output(context->std_output, socket_fd);
367
368         if (fileno == STDERR_FILENO) {
369                 ExecOutput e;
370                 e = fixup_output(context->std_error, socket_fd);
371
372                 /* This expects the input and output are already set up */
373
374                 /* Don't change the stderr file descriptor if we inherit all
375                  * the way and are not on a tty */
376                 if (e == EXEC_OUTPUT_INHERIT &&
377                     o == EXEC_OUTPUT_INHERIT &&
378                     i == EXEC_INPUT_NULL &&
379                     !is_terminal_input(context->std_input) &&
380                     getppid () != 1)
381                         return fileno;
382
383                 /* Duplicate from stdout if possible */
384                 if (e == o || e == EXEC_OUTPUT_INHERIT)
385                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 o = e;
388
389         } else if (o == EXEC_OUTPUT_INHERIT) {
390                 /* If input got downgraded, inherit the original value */
391                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
393
394                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395                 if (i != EXEC_INPUT_NULL)
396                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
397
398                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
399                 if (getppid() != 1)
400                         return fileno;
401
402                 /* We need to open /dev/null here anew, to get the right access mode. */
403                 return open_null_as(O_WRONLY, fileno);
404         }
405
406         switch (o) {
407
408         case EXEC_OUTPUT_NULL:
409                 return open_null_as(O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_TTY:
412                 if (is_terminal_input(i))
413                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
414
415                 /* We don't reset the terminal if this is just about output */
416                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
417
418         case EXEC_OUTPUT_SYSLOG:
419         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420         case EXEC_OUTPUT_KMSG:
421         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422         case EXEC_OUTPUT_JOURNAL:
423         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424                 r = connect_logger_as(context, o, ident, unit_id, fileno);
425                 if (r < 0) {
426                         log_struct_unit(LOG_CRIT, unit_id,
427                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428                                 fileno == STDOUT_FILENO ? "out" : "err",
429                                 unit_id, strerror(-r),
430                                 "ERRNO=%d", -r,
431                                 NULL);
432                         r = open_null_as(O_WRONLY, fileno);
433                 }
434                 return r;
435
436         case EXEC_OUTPUT_SOCKET:
437                 assert(socket_fd >= 0);
438                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
439
440         default:
441                 assert_not_reached("Unknown error type");
442         }
443 }
444
445 static int chown_terminal(int fd, uid_t uid) {
446         struct stat st;
447
448         assert(fd >= 0);
449
450         /* This might fail. What matters are the results. */
451         (void) fchown(fd, uid, -1);
452         (void) fchmod(fd, TTY_MODE);
453
454         if (fstat(fd, &st) < 0)
455                 return -errno;
456
457         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
458                 return -EPERM;
459
460         return 0;
461 }
462
463 static int setup_confirm_stdio(int *_saved_stdin,
464                                int *_saved_stdout) {
465         int fd = -1, saved_stdin, saved_stdout = -1, r;
466
467         assert(_saved_stdin);
468         assert(_saved_stdout);
469
470         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
471         if (saved_stdin < 0)
472                 return -errno;
473
474         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475         if (saved_stdout < 0) {
476                 r = errno;
477                 goto fail;
478         }
479
480         fd = acquire_terminal(
481                         "/dev/console",
482                         false,
483                         false,
484                         false,
485                         DEFAULT_CONFIRM_USEC);
486         if (fd < 0) {
487                 r = fd;
488                 goto fail;
489         }
490
491         r = chown_terminal(fd, getuid());
492         if (r < 0)
493                 goto fail;
494
495         if (dup2(fd, STDIN_FILENO) < 0) {
496                 r = -errno;
497                 goto fail;
498         }
499
500         if (dup2(fd, STDOUT_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (fd >= 2)
506                 safe_close(fd);
507
508         *_saved_stdin = saved_stdin;
509         *_saved_stdout = saved_stdout;
510
511         return 0;
512
513 fail:
514         safe_close(saved_stdout);
515         safe_close(saved_stdin);
516         safe_close(fd);
517
518         return r;
519 }
520
521 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
522         _cleanup_close_ int fd = -1;
523         va_list ap;
524
525         assert(format);
526
527         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
528         if (fd < 0)
529                 return fd;
530
531         va_start(ap, format);
532         vdprintf(fd, format, ap);
533         va_end(ap);
534
535         return 0;
536 }
537
538 static int restore_confirm_stdio(int *saved_stdin,
539                                  int *saved_stdout) {
540
541         int r = 0;
542
543         assert(saved_stdin);
544         assert(saved_stdout);
545
546         release_terminal();
547
548         if (*saved_stdin >= 0)
549                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdout >= 0)
553                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554                         r = -errno;
555
556         safe_close(*saved_stdin);
557         safe_close(*saved_stdout);
558
559         return r;
560 }
561
562 static int ask_for_confirmation(char *response, char **argv) {
563         int saved_stdout = -1, saved_stdin = -1, r;
564         _cleanup_free_ char *line = NULL;
565
566         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567         if (r < 0)
568                 return r;
569
570         line = exec_command_line(argv);
571         if (!line)
572                 return -ENOMEM;
573
574         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         assert(context);
662
663         /* Sets (but doesn't lookup) the uid and make sure we keep the
664          * capabilities while doing so. */
665
666         if (context->capabilities) {
667                 _cleanup_cap_free_ cap_t d = NULL;
668                 static const cap_value_t bits[] = {
669                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
670                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
671                 };
672
673                 /* First step: If we need to keep capabilities but
674                  * drop privileges we need to make sure we keep our
675                  * caps, while we drop privileges. */
676                 if (uid != 0) {
677                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
678
679                         if (prctl(PR_GET_SECUREBITS) != sb)
680                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681                                         return -errno;
682                 }
683
684                 /* Second step: set the capabilities. This will reduce
685                  * the capabilities to the minimum we need. */
686
687                 d = cap_dup(context->capabilities);
688                 if (!d)
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
693                         return -errno;
694
695                 if (cap_set_proc(d) < 0)
696                         return -errno;
697         }
698
699         /* Third step: actually set the uids */
700         if (setresuid(uid, uid, uid) < 0)
701                 return -errno;
702
703         /* At this point we should have all necessary capabilities but
704            are otherwise a normal user. However, the caps might got
705            corrupted due to the setresuid() so we need clean them up
706            later. This is done outside of this call. */
707
708         return 0;
709 }
710
711 #ifdef HAVE_PAM
712
713 static int null_conv(
714                 int num_msg,
715                 const struct pam_message **msg,
716                 struct pam_response **resp,
717                 void *appdata_ptr) {
718
719         /* We don't support conversations */
720
721         return PAM_CONV_ERR;
722 }
723
724 static int setup_pam(
725                 const char *name,
726                 const char *user,
727                 uid_t uid,
728                 const char *tty,
729                 char ***pam_env,
730                 int fds[], unsigned n_fds) {
731
732         static const struct pam_conv conv = {
733                 .conv = null_conv,
734                 .appdata_ptr = NULL
735         };
736
737         pam_handle_t *handle = NULL;
738         sigset_t ss, old_ss;
739         int pam_code = PAM_SUCCESS;
740         int err;
741         char **e = NULL;
742         bool close_session = false;
743         pid_t pam_pid = 0, parent_pid;
744         int flags = 0;
745
746         assert(name);
747         assert(user);
748         assert(pam_env);
749
750         /* We set up PAM in the parent process, then fork. The child
751          * will then stay around until killed via PR_GET_PDEATHSIG or
752          * systemd via the cgroup logic. It will then remove the PAM
753          * session again. The parent process will exec() the actual
754          * daemon. We do things this way to ensure that the main PID
755          * of the daemon is the one we initially fork()ed. */
756
757         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
758                 flags |= PAM_SILENT;
759
760         pam_code = pam_start(name, user, &conv, &handle);
761         if (pam_code != PAM_SUCCESS) {
762                 handle = NULL;
763                 goto fail;
764         }
765
766         if (tty) {
767                 pam_code = pam_set_item(handle, PAM_TTY, tty);
768                 if (pam_code != PAM_SUCCESS)
769                         goto fail;
770         }
771
772         pam_code = pam_acct_mgmt(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         pam_code = pam_open_session(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         e = pam_getenvlist(handle);
783         if (!e) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         pam_pid = fork();
798         if (pam_pid < 0)
799                 goto fail;
800
801         if (pam_pid == 0) {
802                 int sig;
803                 int r = EXIT_PAM;
804
805                 /* The child's job is to reset the PAM session on
806                  * termination */
807
808                 /* This string must fit in 10 chars (i.e. the length
809                  * of "/sbin/init"), to look pretty in /bin/ps */
810                 rename_process("(sd-pam)");
811
812                 /* Make sure we don't keep open the passed fds in this
813                 child. We assume that otherwise only those fds are
814                 open here that have been opened by PAM. */
815                 close_many(fds, n_fds);
816
817                 /* Drop privileges - we don't need any to pam_close_session
818                  * and this will make PR_SET_PDEATHSIG work in most cases.
819                  * If this fails, ignore the error - but expect sd-pam threads
820                  * to fail to exit normally */
821                 if (setresuid(uid, uid, uid) < 0)
822                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823
824                 /* Wait until our parent died. This will only work if
825                  * the above setresuid() succeeds, otherwise the kernel
826                  * will not allow unprivileged parents kill their privileged
827                  * children this way. We rely on the control groups kill logic
828                  * to do the rest for us. */
829                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830                         goto child_finish;
831
832                 /* Check if our parent process might already have
833                  * died? */
834                 if (getppid() == parent_pid) {
835                         for (;;) {
836                                 if (sigwait(&ss, &sig) < 0) {
837                                         if (errno == EINTR)
838                                                 continue;
839
840                                         goto child_finish;
841                                 }
842
843                                 assert(sig == SIGTERM);
844                                 break;
845                         }
846                 }
847
848                 /* If our parent died we'll end the session */
849                 if (getppid() != parent_pid) {
850                         pam_code = pam_close_session(handle, flags);
851                         if (pam_code != PAM_SUCCESS)
852                                 goto child_finish;
853                 }
854
855                 r = 0;
856
857         child_finish:
858                 pam_end(handle, pam_code | flags);
859                 _exit(r);
860         }
861
862         /* If the child was forked off successfully it will do all the
863          * cleanups, so forget about the handle here. */
864         handle = NULL;
865
866         /* Unblock SIGTERM again in the parent */
867         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
868                 goto fail;
869
870         /* We close the log explicitly here, since the PAM modules
871          * might have opened it, but we don't want this fd around. */
872         closelog();
873
874         *pam_env = e;
875         e = NULL;
876
877         return 0;
878
879 fail:
880         if (pam_code != PAM_SUCCESS) {
881                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882                 err = -EPERM;  /* PAM errors do not map to errno */
883         } else {
884                 log_error("PAM failed: %m");
885                 err = -errno;
886         }
887
888         if (handle) {
889                 if (close_session)
890                         pam_code = pam_close_session(handle, flags);
891
892                 pam_end(handle, pam_code | flags);
893         }
894
895         strv_free(e);
896
897         closelog();
898
899         if (pam_pid > 1) {
900                 kill(pam_pid, SIGTERM);
901                 kill(pam_pid, SIGCONT);
902         }
903
904         return err;
905 }
906 #endif
907
908 static void rename_process_from_path(const char *path) {
909         char process_name[11];
910         const char *p;
911         size_t l;
912
913         /* This resulting string must fit in 10 chars (i.e. the length
914          * of "/sbin/init") to look pretty in /bin/ps */
915
916         p = basename(path);
917         if (isempty(p)) {
918                 rename_process("(...)");
919                 return;
920         }
921
922         l = strlen(p);
923         if (l > 8) {
924                 /* The end of the process name is usually more
925                  * interesting, since the first bit might just be
926                  * "systemd-" */
927                 p = p + l - 8;
928                 l = 8;
929         }
930
931         process_name[0] = '(';
932         memcpy(process_name+1, p, l);
933         process_name[1+l] = ')';
934         process_name[1+l+1] = 0;
935
936         rename_process(process_name);
937 }
938
939 #ifdef HAVE_SECCOMP
940
941 static int apply_seccomp(ExecContext *c) {
942         uint32_t negative_action, action;
943         scmp_filter_ctx *seccomp;
944         Iterator i;
945         void *id;
946         int r;
947
948         assert(c);
949
950         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
951
952         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
953         if (!seccomp)
954                 return -ENOMEM;
955
956         if (c->syscall_archs) {
957
958                 SET_FOREACH(id, c->syscall_archs, i) {
959                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
960                         if (r == -EEXIST)
961                                 continue;
962                         if (r < 0)
963                                 goto finish;
964                 }
965
966         } else {
967                 r = seccomp_add_secondary_archs(seccomp);
968                 if (r < 0)
969                         goto finish;
970         }
971
972         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
973         SET_FOREACH(id, c->syscall_filter, i) {
974                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
975                 if (r < 0)
976                         goto finish;
977         }
978
979         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
980         if (r < 0)
981                 goto finish;
982
983         r = seccomp_load(seccomp);
984
985 finish:
986         seccomp_release(seccomp);
987         return r;
988 }
989
990 static int apply_address_families(ExecContext *c) {
991         scmp_filter_ctx *seccomp;
992         Iterator i;
993         int r;
994
995         assert(c);
996
997         seccomp = seccomp_init(SCMP_ACT_ALLOW);
998         if (!seccomp)
999                 return -ENOMEM;
1000
1001         r = seccomp_add_secondary_archs(seccomp);
1002         if (r < 0)
1003                 goto finish;
1004
1005         if (c->address_families_whitelist) {
1006                 int af, first = 0, last = 0;
1007                 void *afp;
1008
1009                 /* If this is a whitelist, we first block the address
1010                  * families that are out of range and then everything
1011                  * that is not in the set. First, we find the lowest
1012                  * and highest address family in the set. */
1013
1014                 SET_FOREACH(afp, c->address_families, i) {
1015                         af = PTR_TO_INT(afp);
1016
1017                         if (af <= 0 || af >= af_max())
1018                                 continue;
1019
1020                         if (first == 0 || af < first)
1021                                 first = af;
1022
1023                         if (last == 0 || af > last)
1024                                 last = af;
1025                 }
1026
1027                 assert((first == 0) == (last == 0));
1028
1029                 if (first == 0) {
1030
1031                         /* No entries in the valid range, block everything */
1032                         r = seccomp_rule_add(
1033                                         seccomp,
1034                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1035                                         SCMP_SYS(socket),
1036                                         0);
1037                         if (r < 0)
1038                                 goto finish;
1039
1040                 } else {
1041
1042                         /* Block everything below the first entry */
1043                         r = seccomp_rule_add(
1044                                         seccomp,
1045                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1046                                         SCMP_SYS(socket),
1047                                         1,
1048                                         SCMP_A0(SCMP_CMP_LT, first));
1049                         if (r < 0)
1050                                 goto finish;
1051
1052                         /* Block everything above the last entry */
1053                         r = seccomp_rule_add(
1054                                         seccomp,
1055                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1056                                         SCMP_SYS(socket),
1057                                         1,
1058                                         SCMP_A0(SCMP_CMP_GT, last));
1059                         if (r < 0)
1060                                 goto finish;
1061
1062                         /* Block everything between the first and last
1063                          * entry */
1064                         for (af = 1; af < af_max(); af++) {
1065
1066                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1067                                         continue;
1068
1069                                 r = seccomp_rule_add(
1070                                                 seccomp,
1071                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1072                                                 SCMP_SYS(socket),
1073                                                 1,
1074                                                 SCMP_A0(SCMP_CMP_EQ, af));
1075                                 if (r < 0)
1076                                         goto finish;
1077                         }
1078                 }
1079
1080         } else {
1081                 void *af;
1082
1083                 /* If this is a blacklist, then generate one rule for
1084                  * each address family that are then combined in OR
1085                  * checks. */
1086
1087                 SET_FOREACH(af, c->address_families, i) {
1088
1089                         r = seccomp_rule_add(
1090                                         seccomp,
1091                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092                                         SCMP_SYS(socket),
1093                                         1,
1094                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1095                         if (r < 0)
1096                                 goto finish;
1097                 }
1098         }
1099
1100         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1101         if (r < 0)
1102                 goto finish;
1103
1104         r = seccomp_load(seccomp);
1105
1106 finish:
1107         seccomp_release(seccomp);
1108         return r;
1109 }
1110
1111 #endif
1112
1113 static void do_idle_pipe_dance(int idle_pipe[4]) {
1114         assert(idle_pipe);
1115
1116
1117         safe_close(idle_pipe[1]);
1118         safe_close(idle_pipe[2]);
1119
1120         if (idle_pipe[0] >= 0) {
1121                 int r;
1122
1123                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1124
1125                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1126                         /* Signal systemd that we are bored and want to continue. */
1127                         write(idle_pipe[3], "x", 1);
1128
1129                         /* Wait for systemd to react to the signal above. */
1130                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1131                 }
1132
1133                 safe_close(idle_pipe[0]);
1134
1135         }
1136
1137         safe_close(idle_pipe[3]);
1138 }
1139
1140 static int build_environment(
1141                 ExecContext *c,
1142                 unsigned n_fds,
1143                 usec_t watchdog_usec,
1144                 const char *home,
1145                 const char *username,
1146                 const char *shell,
1147                 char ***ret) {
1148
1149         _cleanup_strv_free_ char **our_env = NULL;
1150         unsigned n_env = 0;
1151         char *x;
1152
1153         assert(c);
1154         assert(ret);
1155
1156         our_env = new0(char*, 10);
1157         if (!our_env)
1158                 return -ENOMEM;
1159
1160         if (n_fds > 0) {
1161                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1162                         return -ENOMEM;
1163                 our_env[n_env++] = x;
1164
1165                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1166                         return -ENOMEM;
1167                 our_env[n_env++] = x;
1168         }
1169
1170         if (watchdog_usec > 0) {
1171                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1172                         return -ENOMEM;
1173                 our_env[n_env++] = x;
1174
1175                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1176                         return -ENOMEM;
1177                 our_env[n_env++] = x;
1178         }
1179
1180         if (home) {
1181                 x = strappend("HOME=", home);
1182                 if (!x)
1183                         return -ENOMEM;
1184                 our_env[n_env++] = x;
1185         }
1186
1187         if (username) {
1188                 x = strappend("LOGNAME=", username);
1189                 if (!x)
1190                         return -ENOMEM;
1191                 our_env[n_env++] = x;
1192
1193                 x = strappend("USER=", username);
1194                 if (!x)
1195                         return -ENOMEM;
1196                 our_env[n_env++] = x;
1197         }
1198
1199         if (shell) {
1200                 x = strappend("SHELL=", shell);
1201                 if (!x)
1202                         return -ENOMEM;
1203                 our_env[n_env++] = x;
1204         }
1205
1206         if (is_terminal_input(c->std_input) ||
1207             c->std_output == EXEC_OUTPUT_TTY ||
1208             c->std_error == EXEC_OUTPUT_TTY ||
1209             c->tty_path) {
1210
1211                 x = strdup(default_term_for_tty(tty_path(c)));
1212                 if (!x)
1213                         return -ENOMEM;
1214                 our_env[n_env++] = x;
1215         }
1216
1217         our_env[n_env++] = NULL;
1218         assert(n_env <= 10);
1219
1220         *ret = our_env;
1221         our_env = NULL;
1222
1223         return 0;
1224 }
1225
1226 int exec_spawn(ExecCommand *command,
1227                char **argv,
1228                ExecContext *context,
1229                int fds[], unsigned n_fds,
1230                char **environment,
1231                bool apply_permissions,
1232                bool apply_chroot,
1233                bool apply_tty_stdin,
1234                bool confirm_spawn,
1235                CGroupControllerMask cgroup_supported,
1236                const char *cgroup_path,
1237                const char *runtime_prefix,
1238                const char *unit_id,
1239                usec_t watchdog_usec,
1240                int idle_pipe[4],
1241                ExecRuntime *runtime,
1242                pid_t *ret) {
1243
1244         _cleanup_strv_free_ char **files_env = NULL;
1245         int socket_fd;
1246         char *line;
1247         pid_t pid;
1248         int r;
1249
1250         assert(command);
1251         assert(context);
1252         assert(ret);
1253         assert(fds || n_fds <= 0);
1254
1255         if (context->std_input == EXEC_INPUT_SOCKET ||
1256             context->std_output == EXEC_OUTPUT_SOCKET ||
1257             context->std_error == EXEC_OUTPUT_SOCKET) {
1258
1259                 if (n_fds != 1)
1260                         return -EINVAL;
1261
1262                 socket_fd = fds[0];
1263
1264                 fds = NULL;
1265                 n_fds = 0;
1266         } else
1267                 socket_fd = -1;
1268
1269         r = exec_context_load_environment(context, &files_env);
1270         if (r < 0) {
1271                 log_struct_unit(LOG_ERR,
1272                            unit_id,
1273                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1274                            "ERRNO=%d", -r,
1275                            NULL);
1276                 return r;
1277         }
1278
1279         if (!argv)
1280                 argv = command->argv;
1281
1282         line = exec_command_line(argv);
1283         if (!line)
1284                 return log_oom();
1285
1286         log_struct_unit(LOG_DEBUG,
1287                         unit_id,
1288                         "EXECUTABLE=%s", command->path,
1289                         "MESSAGE=About to execute: %s", line,
1290                         NULL);
1291         free(line);
1292
1293         pid = fork();
1294         if (pid < 0)
1295                 return -errno;
1296
1297         if (pid == 0) {
1298                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1299                 const char *username = NULL, *home = NULL, *shell = NULL;
1300                 unsigned n_dont_close = 0;
1301                 int dont_close[n_fds + 3];
1302                 uid_t uid = (uid_t) -1;
1303                 gid_t gid = (gid_t) -1;
1304                 sigset_t ss;
1305                 int i, err;
1306
1307                 /* child */
1308
1309                 rename_process_from_path(command->path);
1310
1311                 /* We reset exactly these signals, since they are the
1312                  * only ones we set to SIG_IGN in the main daemon. All
1313                  * others we leave untouched because we set them to
1314                  * SIG_DFL or a valid handler initially, both of which
1315                  * will be demoted to SIG_DFL. */
1316                 default_signals(SIGNALS_CRASH_HANDLER,
1317                                 SIGNALS_IGNORE, -1);
1318
1319                 if (context->ignore_sigpipe)
1320                         ignore_signals(SIGPIPE, -1);
1321
1322                 assert_se(sigemptyset(&ss) == 0);
1323                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1324                         err = -errno;
1325                         r = EXIT_SIGNAL_MASK;
1326                         goto fail_child;
1327                 }
1328
1329                 if (idle_pipe)
1330                         do_idle_pipe_dance(idle_pipe);
1331
1332                 /* Close sockets very early to make sure we don't
1333                  * block init reexecution because it cannot bind its
1334                  * sockets */
1335                 log_forget_fds();
1336
1337                 if (socket_fd >= 0)
1338                         dont_close[n_dont_close++] = socket_fd;
1339                 if (n_fds > 0) {
1340                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1341                         n_dont_close += n_fds;
1342                 }
1343                 if (runtime) {
1344                         if (runtime->netns_storage_socket[0] >= 0)
1345                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1346                         if (runtime->netns_storage_socket[1] >= 0)
1347                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1348                 }
1349
1350                 err = close_all_fds(dont_close, n_dont_close);
1351                 if (err < 0) {
1352                         r = EXIT_FDS;
1353                         goto fail_child;
1354                 }
1355
1356                 if (!context->same_pgrp)
1357                         if (setsid() < 0) {
1358                                 err = -errno;
1359                                 r = EXIT_SETSID;
1360                                 goto fail_child;
1361                         }
1362
1363                 exec_context_tty_reset(context);
1364
1365                 if (confirm_spawn) {
1366                         char response;
1367
1368                         err = ask_for_confirmation(&response, argv);
1369                         if (err == -ETIMEDOUT)
1370                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1371                         else if (err < 0)
1372                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1373                         else if (response == 's') {
1374                                 write_confirm_message("Skipping execution.\n");
1375                                 err = -ECANCELED;
1376                                 r = EXIT_CONFIRM;
1377                                 goto fail_child;
1378                         } else if (response == 'n') {
1379                                 write_confirm_message("Failing execution.\n");
1380                                 err = r = 0;
1381                                 goto fail_child;
1382                         }
1383                 }
1384
1385                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1386                  * must sure to drop O_NONBLOCK */
1387                 if (socket_fd >= 0)
1388                         fd_nonblock(socket_fd, false);
1389
1390                 err = setup_input(context, socket_fd, apply_tty_stdin);
1391                 if (err < 0) {
1392                         r = EXIT_STDIN;
1393                         goto fail_child;
1394                 }
1395
1396                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1397                 if (err < 0) {
1398                         r = EXIT_STDOUT;
1399                         goto fail_child;
1400                 }
1401
1402                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1403                 if (err < 0) {
1404                         r = EXIT_STDERR;
1405                         goto fail_child;
1406                 }
1407
1408                 if (cgroup_path) {
1409                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1410                         if (err < 0) {
1411                                 r = EXIT_CGROUP;
1412                                 goto fail_child;
1413                         }
1414                 }
1415
1416                 if (context->oom_score_adjust_set) {
1417                         char t[16];
1418
1419                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1420                         char_array_0(t);
1421
1422                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1423                                 err = -errno;
1424                                 r = EXIT_OOM_ADJUST;
1425                                 goto fail_child;
1426                         }
1427                 }
1428
1429                 if (context->nice_set)
1430                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1431                                 err = -errno;
1432                                 r = EXIT_NICE;
1433                                 goto fail_child;
1434                         }
1435
1436                 if (context->cpu_sched_set) {
1437                         struct sched_param param = {
1438                                 .sched_priority = context->cpu_sched_priority,
1439                         };
1440
1441                         r = sched_setscheduler(0,
1442                                                context->cpu_sched_policy |
1443                                                (context->cpu_sched_reset_on_fork ?
1444                                                 SCHED_RESET_ON_FORK : 0),
1445                                                &param);
1446                         if (r < 0) {
1447                                 err = -errno;
1448                                 r = EXIT_SETSCHEDULER;
1449                                 goto fail_child;
1450                         }
1451                 }
1452
1453                 if (context->cpuset)
1454                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1455                                 err = -errno;
1456                                 r = EXIT_CPUAFFINITY;
1457                                 goto fail_child;
1458                         }
1459
1460                 if (context->ioprio_set)
1461                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1462                                 err = -errno;
1463                                 r = EXIT_IOPRIO;
1464                                 goto fail_child;
1465                         }
1466
1467                 if (context->timer_slack_nsec != NSEC_INFINITY)
1468                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1469                                 err = -errno;
1470                                 r = EXIT_TIMERSLACK;
1471                                 goto fail_child;
1472                         }
1473
1474                 if (context->personality != 0xffffffffUL)
1475                         if (personality(context->personality) < 0) {
1476                                 err = -errno;
1477                                 r = EXIT_PERSONALITY;
1478                                 goto fail_child;
1479                         }
1480
1481                 if (context->utmp_id)
1482                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1483
1484                 if (context->user) {
1485                         username = context->user;
1486                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1487                         if (err < 0) {
1488                                 r = EXIT_USER;
1489                                 goto fail_child;
1490                         }
1491
1492                         if (is_terminal_input(context->std_input)) {
1493                                 err = chown_terminal(STDIN_FILENO, uid);
1494                                 if (err < 0) {
1495                                         r = EXIT_STDIN;
1496                                         goto fail_child;
1497                                 }
1498                         }
1499                 }
1500
1501 #ifdef HAVE_PAM
1502                 if (cgroup_path && context->user && context->pam_name) {
1503                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1504                         if (err < 0) {
1505                                 r = EXIT_CGROUP;
1506                                 goto fail_child;
1507                         }
1508
1509
1510                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1511                         if (err < 0) {
1512                                 r = EXIT_CGROUP;
1513                                 goto fail_child;
1514                         }
1515                 }
1516 #endif
1517
1518                 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1519                         char **rt;
1520
1521                         STRV_FOREACH(rt, context->runtime_directory) {
1522                                 _cleanup_free_ char *p;
1523
1524                                 p = strjoin(runtime_prefix, "/", *rt, NULL);
1525                                 if (!p) {
1526                                         r = EXIT_RUNTIME_DIRECTORY;
1527                                         err = -ENOMEM;
1528                                         goto fail_child;
1529                                 }
1530
1531                                 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1532                                 if (err < 0) {
1533                                         r = EXIT_RUNTIME_DIRECTORY;
1534                                         goto fail_child;
1535                                 }
1536                         }
1537                 }
1538
1539                 if (apply_permissions) {
1540                         err = enforce_groups(context, username, gid);
1541                         if (err < 0) {
1542                                 r = EXIT_GROUP;
1543                                 goto fail_child;
1544                         }
1545                 }
1546
1547                 umask(context->umask);
1548
1549 #ifdef HAVE_PAM
1550                 if (apply_permissions && context->pam_name && username) {
1551                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1552                         if (err < 0) {
1553                                 r = EXIT_PAM;
1554                                 goto fail_child;
1555                         }
1556                 }
1557 #endif
1558                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1559                         err = setup_netns(runtime->netns_storage_socket);
1560                         if (err < 0) {
1561                                 r = EXIT_NETWORK;
1562                                 goto fail_child;
1563                         }
1564                 }
1565
1566                 if (!strv_isempty(context->read_write_dirs) ||
1567                     !strv_isempty(context->read_only_dirs) ||
1568                     !strv_isempty(context->inaccessible_dirs) ||
1569                     context->mount_flags != 0 ||
1570                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1571                     context->private_devices ||
1572                     context->protect_system != PROTECT_SYSTEM_NO ||
1573                     context->protect_home != PROTECT_HOME_NO) {
1574
1575                         char *tmp = NULL, *var = NULL;
1576
1577                         /* The runtime struct only contains the parent
1578                          * of the private /tmp, which is
1579                          * non-accessible to world users. Inside of it
1580                          * there's a /tmp that is sticky, and that's
1581                          * the one we want to use here. */
1582
1583                         if (context->private_tmp && runtime) {
1584                                 if (runtime->tmp_dir)
1585                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1586                                 if (runtime->var_tmp_dir)
1587                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1588                         }
1589
1590                         err = setup_namespace(
1591                                         context->read_write_dirs,
1592                                         context->read_only_dirs,
1593                                         context->inaccessible_dirs,
1594                                         tmp,
1595                                         var,
1596                                         context->private_devices,
1597                                         context->protect_home,
1598                                         context->protect_system,
1599                                         context->mount_flags);
1600                         if (err < 0) {
1601                                 r = EXIT_NAMESPACE;
1602                                 goto fail_child;
1603                         }
1604                 }
1605
1606                 if (apply_chroot) {
1607                         if (context->root_directory)
1608                                 if (chroot(context->root_directory) < 0) {
1609                                         err = -errno;
1610                                         r = EXIT_CHROOT;
1611                                         goto fail_child;
1612                                 }
1613
1614                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1615                                 err = -errno;
1616                                 r = EXIT_CHDIR;
1617                                 goto fail_child;
1618                         }
1619                 } else {
1620                         _cleanup_free_ char *d = NULL;
1621
1622                         if (asprintf(&d, "%s/%s",
1623                                      context->root_directory ? context->root_directory : "",
1624                                      context->working_directory ? context->working_directory : "") < 0) {
1625                                 err = -ENOMEM;
1626                                 r = EXIT_MEMORY;
1627                                 goto fail_child;
1628                         }
1629
1630                         if (chdir(d) < 0) {
1631                                 err = -errno;
1632                                 r = EXIT_CHDIR;
1633                                 goto fail_child;
1634                         }
1635                 }
1636
1637                 /* We repeat the fd closing here, to make sure that
1638                  * nothing is leaked from the PAM modules. Note that
1639                  * we are more aggressive this time since socket_fd
1640                  * and the netns fds we don#t need anymore. */
1641                 err = close_all_fds(fds, n_fds);
1642                 if (err >= 0)
1643                         err = shift_fds(fds, n_fds);
1644                 if (err >= 0)
1645                         err = flags_fds(fds, n_fds, context->non_blocking);
1646                 if (err < 0) {
1647                         r = EXIT_FDS;
1648                         goto fail_child;
1649                 }
1650
1651                 if (apply_permissions) {
1652
1653                         for (i = 0; i < _RLIMIT_MAX; i++) {
1654                                 if (!context->rlimit[i])
1655                                         continue;
1656
1657                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1658                                         err = -errno;
1659                                         r = EXIT_LIMITS;
1660                                         goto fail_child;
1661                                 }
1662                         }
1663
1664                         if (context->capability_bounding_set_drop) {
1665                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1666                                 if (err < 0) {
1667                                         r = EXIT_CAPABILITIES;
1668                                         goto fail_child;
1669                                 }
1670                         }
1671
1672                         if (context->user) {
1673                                 err = enforce_user(context, uid);
1674                                 if (err < 0) {
1675                                         r = EXIT_USER;
1676                                         goto fail_child;
1677                                 }
1678                         }
1679
1680                         /* PR_GET_SECUREBITS is not privileged, while
1681                          * PR_SET_SECUREBITS is. So to suppress
1682                          * potential EPERMs we'll try not to call
1683                          * PR_SET_SECUREBITS unless necessary. */
1684                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1685                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1686                                         err = -errno;
1687                                         r = EXIT_SECUREBITS;
1688                                         goto fail_child;
1689                                 }
1690
1691                         if (context->capabilities)
1692                                 if (cap_set_proc(context->capabilities) < 0) {
1693                                         err = -errno;
1694                                         r = EXIT_CAPABILITIES;
1695                                         goto fail_child;
1696                                 }
1697
1698                         if (context->no_new_privileges)
1699                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1700                                         err = -errno;
1701                                         r = EXIT_NO_NEW_PRIVILEGES;
1702                                         goto fail_child;
1703                                 }
1704
1705 #ifdef HAVE_SECCOMP
1706                         if (context->address_families_whitelist ||
1707                             !set_isempty(context->address_families)) {
1708                                 err = apply_address_families(context);
1709                                 if (err < 0) {
1710                                         r = EXIT_ADDRESS_FAMILIES;
1711                                         goto fail_child;
1712                                 }
1713                         }
1714
1715                         if (context->syscall_whitelist ||
1716                             !set_isempty(context->syscall_filter) ||
1717                             !set_isempty(context->syscall_archs)) {
1718                                 err = apply_seccomp(context);
1719                                 if (err < 0) {
1720                                         r = EXIT_SECCOMP;
1721                                         goto fail_child;
1722                                 }
1723                         }
1724 #endif
1725
1726 #ifdef HAVE_SELINUX
1727                         if (context->selinux_context && use_selinux()) {
1728                                 err = setexeccon(context->selinux_context);
1729                                 if (err < 0 && !context->selinux_context_ignore) {
1730                                         r = EXIT_SELINUX_CONTEXT;
1731                                         goto fail_child;
1732                                 }
1733                         }
1734 #endif
1735
1736 #ifdef HAVE_APPARMOR
1737                         if (context->apparmor_profile && use_apparmor()) {
1738                                 err = aa_change_onexec(context->apparmor_profile);
1739                                 if (err < 0 && !context->apparmor_profile_ignore) {
1740                                         r = EXIT_APPARMOR_PROFILE;
1741                                         goto fail_child;
1742                                 }
1743                         }
1744 #endif
1745                 }
1746
1747                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1748                 if (r < 0) {
1749                         r = EXIT_MEMORY;
1750                         goto fail_child;
1751                 }
1752
1753                 final_env = strv_env_merge(5,
1754                                            environment,
1755                                            our_env,
1756                                            context->environment,
1757                                            files_env,
1758                                            pam_env,
1759                                            NULL);
1760                 if (!final_env) {
1761                         err = -ENOMEM;
1762                         r = EXIT_MEMORY;
1763                         goto fail_child;
1764                 }
1765
1766                 final_argv = replace_env_argv(argv, final_env);
1767                 if (!final_argv) {
1768                         err = -ENOMEM;
1769                         r = EXIT_MEMORY;
1770                         goto fail_child;
1771                 }
1772
1773                 final_env = strv_env_clean(final_env);
1774
1775                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1776                         line = exec_command_line(final_argv);
1777                         if (line) {
1778                                 log_open();
1779                                 log_struct_unit(LOG_DEBUG,
1780                                                 unit_id,
1781                                                 "EXECUTABLE=%s", command->path,
1782                                                 "MESSAGE=Executing: %s", line,
1783                                                 NULL);
1784                                 log_close();
1785                                 free(line);
1786                                 line = NULL;
1787                         }
1788                 }
1789                 execve(command->path, final_argv, final_env);
1790                 err = -errno;
1791                 r = EXIT_EXEC;
1792
1793         fail_child:
1794                 if (r != 0) {
1795                         log_open();
1796                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1797                                    "EXECUTABLE=%s", command->path,
1798                                    "MESSAGE=Failed at step %s spawning %s: %s",
1799                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1800                                           command->path, strerror(-err),
1801                                    "ERRNO=%d", -err,
1802                                    NULL);
1803                         log_close();
1804                 }
1805
1806                 _exit(r);
1807         }
1808
1809         log_struct_unit(LOG_DEBUG,
1810                         unit_id,
1811                         "MESSAGE=Forked %s as "PID_FMT,
1812                         command->path, pid,
1813                         NULL);
1814
1815         /* We add the new process to the cgroup both in the child (so
1816          * that we can be sure that no user code is ever executed
1817          * outside of the cgroup) and in the parent (so that we can be
1818          * sure that when we kill the cgroup the process will be
1819          * killed too). */
1820         if (cgroup_path)
1821                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1822
1823         exec_status_start(&command->exec_status, pid);
1824
1825         *ret = pid;
1826         return 0;
1827 }
1828
1829 void exec_context_init(ExecContext *c) {
1830         assert(c);
1831
1832         c->umask = 0022;
1833         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1834         c->cpu_sched_policy = SCHED_OTHER;
1835         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1836         c->syslog_level_prefix = true;
1837         c->ignore_sigpipe = true;
1838         c->timer_slack_nsec = NSEC_INFINITY;
1839         c->personality = 0xffffffffUL;
1840         c->runtime_directory_mode = 0755;
1841 }
1842
1843 void exec_context_done(ExecContext *c) {
1844         unsigned l;
1845
1846         assert(c);
1847
1848         strv_free(c->environment);
1849         c->environment = NULL;
1850
1851         strv_free(c->environment_files);
1852         c->environment_files = NULL;
1853
1854         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1855                 free(c->rlimit[l]);
1856                 c->rlimit[l] = NULL;
1857         }
1858
1859         free(c->working_directory);
1860         c->working_directory = NULL;
1861         free(c->root_directory);
1862         c->root_directory = NULL;
1863
1864         free(c->tty_path);
1865         c->tty_path = NULL;
1866
1867         free(c->syslog_identifier);
1868         c->syslog_identifier = NULL;
1869
1870         free(c->user);
1871         c->user = NULL;
1872
1873         free(c->group);
1874         c->group = NULL;
1875
1876         strv_free(c->supplementary_groups);
1877         c->supplementary_groups = NULL;
1878
1879         free(c->pam_name);
1880         c->pam_name = NULL;
1881
1882         if (c->capabilities) {
1883                 cap_free(c->capabilities);
1884                 c->capabilities = NULL;
1885         }
1886
1887         strv_free(c->read_only_dirs);
1888         c->read_only_dirs = NULL;
1889
1890         strv_free(c->read_write_dirs);
1891         c->read_write_dirs = NULL;
1892
1893         strv_free(c->inaccessible_dirs);
1894         c->inaccessible_dirs = NULL;
1895
1896         if (c->cpuset)
1897                 CPU_FREE(c->cpuset);
1898
1899         free(c->utmp_id);
1900         c->utmp_id = NULL;
1901
1902         free(c->selinux_context);
1903         c->selinux_context = NULL;
1904
1905         free(c->apparmor_profile);
1906         c->apparmor_profile = NULL;
1907
1908         set_free(c->syscall_filter);
1909         c->syscall_filter = NULL;
1910
1911         set_free(c->syscall_archs);
1912         c->syscall_archs = NULL;
1913
1914         set_free(c->address_families);
1915         c->address_families = NULL;
1916
1917         strv_free(c->runtime_directory);
1918         c->runtime_directory = NULL;
1919 }
1920
1921 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1922         char **i;
1923
1924         assert(c);
1925
1926         if (!runtime_prefix)
1927                 return 0;
1928
1929         STRV_FOREACH(i, c->runtime_directory) {
1930                 _cleanup_free_ char *p;
1931
1932                 p = strjoin(runtime_prefix, "/", *i, NULL);
1933                 if (!p)
1934                         return -ENOMEM;
1935
1936                 /* We execute this synchronously, since we need to be
1937                  * sure this is gone when we start the service
1938                  * next. */
1939                 rm_rf_dangerous(p, false, true, false);
1940         }
1941
1942         return 0;
1943 }
1944
1945 void exec_command_done(ExecCommand *c) {
1946         assert(c);
1947
1948         free(c->path);
1949         c->path = NULL;
1950
1951         strv_free(c->argv);
1952         c->argv = NULL;
1953 }
1954
1955 void exec_command_done_array(ExecCommand *c, unsigned n) {
1956         unsigned i;
1957
1958         for (i = 0; i < n; i++)
1959                 exec_command_done(c+i);
1960 }
1961
1962 void exec_command_free_list(ExecCommand *c) {
1963         ExecCommand *i;
1964
1965         while ((i = c)) {
1966                 LIST_REMOVE(command, c, i);
1967                 exec_command_done(i);
1968                 free(i);
1969         }
1970 }
1971
1972 void exec_command_free_array(ExecCommand **c, unsigned n) {
1973         unsigned i;
1974
1975         for (i = 0; i < n; i++) {
1976                 exec_command_free_list(c[i]);
1977                 c[i] = NULL;
1978         }
1979 }
1980
1981 int exec_context_load_environment(const ExecContext *c, char ***l) {
1982         char **i, **r = NULL;
1983
1984         assert(c);
1985         assert(l);
1986
1987         STRV_FOREACH(i, c->environment_files) {
1988                 char *fn;
1989                 int k;
1990                 bool ignore = false;
1991                 char **p;
1992                 _cleanup_globfree_ glob_t pglob = {};
1993                 int count, n;
1994
1995                 fn = *i;
1996
1997                 if (fn[0] == '-') {
1998                         ignore = true;
1999                         fn ++;
2000                 }
2001
2002                 if (!path_is_absolute(fn)) {
2003                         if (ignore)
2004                                 continue;
2005
2006                         strv_free(r);
2007                         return -EINVAL;
2008                 }
2009
2010                 /* Filename supports globbing, take all matching files */
2011                 errno = 0;
2012                 if (glob(fn, 0, NULL, &pglob) != 0) {
2013                         if (ignore)
2014                                 continue;
2015
2016                         strv_free(r);
2017                         return errno ? -errno : -EINVAL;
2018                 }
2019                 count = pglob.gl_pathc;
2020                 if (count == 0) {
2021                         if (ignore)
2022                                 continue;
2023
2024                         strv_free(r);
2025                         return -EINVAL;
2026                 }
2027                 for (n = 0; n < count; n++) {
2028                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2029                         if (k < 0) {
2030                                 if (ignore)
2031                                         continue;
2032
2033                                 strv_free(r);
2034                                 return k;
2035                         }
2036                         /* Log invalid environment variables with filename */
2037                         if (p)
2038                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2039
2040                         if (r == NULL)
2041                                 r = p;
2042                         else {
2043                                 char **m;
2044
2045                                 m = strv_env_merge(2, r, p);
2046                                 strv_free(r);
2047                                 strv_free(p);
2048                                 if (!m)
2049                                         return -ENOMEM;
2050
2051                                 r = m;
2052                         }
2053                 }
2054         }
2055
2056         *l = r;
2057
2058         return 0;
2059 }
2060
2061 static bool tty_may_match_dev_console(const char *tty) {
2062         _cleanup_free_ char *active = NULL;
2063        char *console;
2064
2065         if (startswith(tty, "/dev/"))
2066                 tty += 5;
2067
2068         /* trivial identity? */
2069         if (streq(tty, "console"))
2070                 return true;
2071
2072         console = resolve_dev_console(&active);
2073         /* if we could not resolve, assume it may */
2074         if (!console)
2075                 return true;
2076
2077         /* "tty0" means the active VC, so it may be the same sometimes */
2078         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2079 }
2080
2081 bool exec_context_may_touch_console(ExecContext *ec) {
2082         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2083                 is_terminal_input(ec->std_input) ||
2084                 is_terminal_output(ec->std_output) ||
2085                 is_terminal_output(ec->std_error)) &&
2086                tty_may_match_dev_console(tty_path(ec));
2087 }
2088
2089 static void strv_fprintf(FILE *f, char **l) {
2090         char **g;
2091
2092         assert(f);
2093
2094         STRV_FOREACH(g, l)
2095                 fprintf(f, " %s", *g);
2096 }
2097
2098 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2099         char **e;
2100         unsigned i;
2101
2102         assert(c);
2103         assert(f);
2104
2105         prefix = strempty(prefix);
2106
2107         fprintf(f,
2108                 "%sUMask: %04o\n"
2109                 "%sWorkingDirectory: %s\n"
2110                 "%sRootDirectory: %s\n"
2111                 "%sNonBlocking: %s\n"
2112                 "%sPrivateTmp: %s\n"
2113                 "%sPrivateNetwork: %s\n"
2114                 "%sPrivateDevices: %s\n"
2115                 "%sProtectHome: %s\n"
2116                 "%sProtectSystem: %s\n"
2117                 "%sIgnoreSIGPIPE: %s\n",
2118                 prefix, c->umask,
2119                 prefix, c->working_directory ? c->working_directory : "/",
2120                 prefix, c->root_directory ? c->root_directory : "/",
2121                 prefix, yes_no(c->non_blocking),
2122                 prefix, yes_no(c->private_tmp),
2123                 prefix, yes_no(c->private_network),
2124                 prefix, yes_no(c->private_devices),
2125                 prefix, protect_home_to_string(c->protect_home),
2126                 prefix, protect_system_to_string(c->protect_system),
2127                 prefix, yes_no(c->ignore_sigpipe));
2128
2129         STRV_FOREACH(e, c->environment)
2130                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2131
2132         STRV_FOREACH(e, c->environment_files)
2133                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2134
2135         if (c->nice_set)
2136                 fprintf(f,
2137                         "%sNice: %i\n",
2138                         prefix, c->nice);
2139
2140         if (c->oom_score_adjust_set)
2141                 fprintf(f,
2142                         "%sOOMScoreAdjust: %i\n",
2143                         prefix, c->oom_score_adjust);
2144
2145         for (i = 0; i < RLIM_NLIMITS; i++)
2146                 if (c->rlimit[i])
2147                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2148                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2149
2150         if (c->ioprio_set) {
2151                 _cleanup_free_ char *class_str = NULL;
2152
2153                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2154                 fprintf(f,
2155                         "%sIOSchedulingClass: %s\n"
2156                         "%sIOPriority: %i\n",
2157                         prefix, strna(class_str),
2158                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2159         }
2160
2161         if (c->cpu_sched_set) {
2162                 _cleanup_free_ char *policy_str = NULL;
2163
2164                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2165                 fprintf(f,
2166                         "%sCPUSchedulingPolicy: %s\n"
2167                         "%sCPUSchedulingPriority: %i\n"
2168                         "%sCPUSchedulingResetOnFork: %s\n",
2169                         prefix, strna(policy_str),
2170                         prefix, c->cpu_sched_priority,
2171                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2172         }
2173
2174         if (c->cpuset) {
2175                 fprintf(f, "%sCPUAffinity:", prefix);
2176                 for (i = 0; i < c->cpuset_ncpus; i++)
2177                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2178                                 fprintf(f, " %u", i);
2179                 fputs("\n", f);
2180         }
2181
2182         if (c->timer_slack_nsec != NSEC_INFINITY)
2183                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2184
2185         fprintf(f,
2186                 "%sStandardInput: %s\n"
2187                 "%sStandardOutput: %s\n"
2188                 "%sStandardError: %s\n",
2189                 prefix, exec_input_to_string(c->std_input),
2190                 prefix, exec_output_to_string(c->std_output),
2191                 prefix, exec_output_to_string(c->std_error));
2192
2193         if (c->tty_path)
2194                 fprintf(f,
2195                         "%sTTYPath: %s\n"
2196                         "%sTTYReset: %s\n"
2197                         "%sTTYVHangup: %s\n"
2198                         "%sTTYVTDisallocate: %s\n",
2199                         prefix, c->tty_path,
2200                         prefix, yes_no(c->tty_reset),
2201                         prefix, yes_no(c->tty_vhangup),
2202                         prefix, yes_no(c->tty_vt_disallocate));
2203
2204         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2205             c->std_output == EXEC_OUTPUT_KMSG ||
2206             c->std_output == EXEC_OUTPUT_JOURNAL ||
2207             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2208             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2209             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2210             c->std_error == EXEC_OUTPUT_SYSLOG ||
2211             c->std_error == EXEC_OUTPUT_KMSG ||
2212             c->std_error == EXEC_OUTPUT_JOURNAL ||
2213             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2214             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2215             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2216
2217                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2218
2219                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2220                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2221
2222                 fprintf(f,
2223                         "%sSyslogFacility: %s\n"
2224                         "%sSyslogLevel: %s\n",
2225                         prefix, strna(fac_str),
2226                         prefix, strna(lvl_str));
2227         }
2228
2229         if (c->capabilities) {
2230                 _cleanup_cap_free_charp_ char *t;
2231
2232                 t = cap_to_text(c->capabilities, NULL);
2233                 if (t)
2234                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2235         }
2236
2237         if (c->secure_bits)
2238                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2239                         prefix,
2240                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2241                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2242                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2243                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2244                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2245                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2246
2247         if (c->capability_bounding_set_drop) {
2248                 unsigned long l;
2249                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2250
2251                 for (l = 0; l <= cap_last_cap(); l++)
2252                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2253                                 _cleanup_cap_free_charp_ char *t;
2254
2255                                 t = cap_to_name(l);
2256                                 if (t)
2257                                         fprintf(f, " %s", t);
2258                         }
2259
2260                 fputs("\n", f);
2261         }
2262
2263         if (c->user)
2264                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2265         if (c->group)
2266                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2267
2268         if (strv_length(c->supplementary_groups) > 0) {
2269                 fprintf(f, "%sSupplementaryGroups:", prefix);
2270                 strv_fprintf(f, c->supplementary_groups);
2271                 fputs("\n", f);
2272         }
2273
2274         if (c->pam_name)
2275                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2276
2277         if (strv_length(c->read_write_dirs) > 0) {
2278                 fprintf(f, "%sReadWriteDirs:", prefix);
2279                 strv_fprintf(f, c->read_write_dirs);
2280                 fputs("\n", f);
2281         }
2282
2283         if (strv_length(c->read_only_dirs) > 0) {
2284                 fprintf(f, "%sReadOnlyDirs:", prefix);
2285                 strv_fprintf(f, c->read_only_dirs);
2286                 fputs("\n", f);
2287         }
2288
2289         if (strv_length(c->inaccessible_dirs) > 0) {
2290                 fprintf(f, "%sInaccessibleDirs:", prefix);
2291                 strv_fprintf(f, c->inaccessible_dirs);
2292                 fputs("\n", f);
2293         }
2294
2295         if (c->utmp_id)
2296                 fprintf(f,
2297                         "%sUtmpIdentifier: %s\n",
2298                         prefix, c->utmp_id);
2299
2300         if (c->selinux_context)
2301                 fprintf(f,
2302                         "%sSELinuxContext: %s%s\n",
2303                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2304
2305         if (c->personality != 0xffffffffUL)
2306                 fprintf(f,
2307                         "%sPersonality: %s\n",
2308                         prefix, strna(personality_to_string(c->personality)));
2309
2310         if (c->syscall_filter) {
2311 #ifdef HAVE_SECCOMP
2312                 Iterator j;
2313                 void *id;
2314                 bool first = true;
2315 #endif
2316
2317                 fprintf(f,
2318                         "%sSystemCallFilter: ",
2319                         prefix);
2320
2321                 if (!c->syscall_whitelist)
2322                         fputc('~', f);
2323
2324 #ifdef HAVE_SECCOMP
2325                 SET_FOREACH(id, c->syscall_filter, j) {
2326                         _cleanup_free_ char *name = NULL;
2327
2328                         if (first)
2329                                 first = false;
2330                         else
2331                                 fputc(' ', f);
2332
2333                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2334                         fputs(strna(name), f);
2335                 }
2336 #endif
2337
2338                 fputc('\n', f);
2339         }
2340
2341         if (c->syscall_archs) {
2342 #ifdef HAVE_SECCOMP
2343                 Iterator j;
2344                 void *id;
2345 #endif
2346
2347                 fprintf(f,
2348                         "%sSystemCallArchitectures:",
2349                         prefix);
2350
2351 #ifdef HAVE_SECCOMP
2352                 SET_FOREACH(id, c->syscall_archs, j)
2353                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2354 #endif
2355                 fputc('\n', f);
2356         }
2357
2358         if (c->syscall_errno != 0)
2359                 fprintf(f,
2360                         "%sSystemCallErrorNumber: %s\n",
2361                         prefix, strna(errno_to_name(c->syscall_errno)));
2362
2363         if (c->apparmor_profile)
2364                 fprintf(f,
2365                         "%sAppArmorProfile: %s%s\n",
2366                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2367 }
2368
2369 void exec_status_start(ExecStatus *s, pid_t pid) {
2370         assert(s);
2371
2372         zero(*s);
2373         s->pid = pid;
2374         dual_timestamp_get(&s->start_timestamp);
2375 }
2376
2377 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2378         assert(s);
2379
2380         if (s->pid && s->pid != pid)
2381                 zero(*s);
2382
2383         s->pid = pid;
2384         dual_timestamp_get(&s->exit_timestamp);
2385
2386         s->code = code;
2387         s->status = status;
2388
2389         if (context) {
2390                 if (context->utmp_id)
2391                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2392
2393                 exec_context_tty_reset(context);
2394         }
2395 }
2396
2397 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2398         char buf[FORMAT_TIMESTAMP_MAX];
2399
2400         assert(s);
2401         assert(f);
2402
2403         if (s->pid <= 0)
2404                 return;
2405
2406         prefix = strempty(prefix);
2407
2408         fprintf(f,
2409                 "%sPID: "PID_FMT"\n",
2410                 prefix, s->pid);
2411
2412         if (s->start_timestamp.realtime > 0)
2413                 fprintf(f,
2414                         "%sStart Timestamp: %s\n",
2415                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2416
2417         if (s->exit_timestamp.realtime > 0)
2418                 fprintf(f,
2419                         "%sExit Timestamp: %s\n"
2420                         "%sExit Code: %s\n"
2421                         "%sExit Status: %i\n",
2422                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2423                         prefix, sigchld_code_to_string(s->code),
2424                         prefix, s->status);
2425 }
2426
2427 char *exec_command_line(char **argv) {
2428         size_t k;
2429         char *n, *p, **a;
2430         bool first = true;
2431
2432         assert(argv);
2433
2434         k = 1;
2435         STRV_FOREACH(a, argv)
2436                 k += strlen(*a)+3;
2437
2438         if (!(n = new(char, k)))
2439                 return NULL;
2440
2441         p = n;
2442         STRV_FOREACH(a, argv) {
2443
2444                 if (!first)
2445                         *(p++) = ' ';
2446                 else
2447                         first = false;
2448
2449                 if (strpbrk(*a, WHITESPACE)) {
2450                         *(p++) = '\'';
2451                         p = stpcpy(p, *a);
2452                         *(p++) = '\'';
2453                 } else
2454                         p = stpcpy(p, *a);
2455
2456         }
2457
2458         *p = 0;
2459
2460         /* FIXME: this doesn't really handle arguments that have
2461          * spaces and ticks in them */
2462
2463         return n;
2464 }
2465
2466 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2467         _cleanup_free_ char *cmd = NULL;
2468         const char *prefix2;
2469
2470         assert(c);
2471         assert(f);
2472
2473         prefix = strempty(prefix);
2474         prefix2 = strappenda(prefix, "\t");
2475
2476         cmd = exec_command_line(c->argv);
2477         fprintf(f,
2478                 "%sCommand Line: %s\n",
2479                 prefix, cmd ? cmd : strerror(ENOMEM));
2480
2481         exec_status_dump(&c->exec_status, f, prefix2);
2482 }
2483
2484 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2485         assert(f);
2486
2487         prefix = strempty(prefix);
2488
2489         LIST_FOREACH(command, c, c)
2490                 exec_command_dump(c, f, prefix);
2491 }
2492
2493 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2494         ExecCommand *end;
2495
2496         assert(l);
2497         assert(e);
2498
2499         if (*l) {
2500                 /* It's kind of important, that we keep the order here */
2501                 LIST_FIND_TAIL(command, *l, end);
2502                 LIST_INSERT_AFTER(command, *l, end, e);
2503         } else
2504               *l = e;
2505 }
2506
2507 int exec_command_set(ExecCommand *c, const char *path, ...) {
2508         va_list ap;
2509         char **l, *p;
2510
2511         assert(c);
2512         assert(path);
2513
2514         va_start(ap, path);
2515         l = strv_new_ap(path, ap);
2516         va_end(ap);
2517
2518         if (!l)
2519                 return -ENOMEM;
2520
2521         p = strdup(path);
2522         if (!p) {
2523                 strv_free(l);
2524                 return -ENOMEM;
2525         }
2526
2527         free(c->path);
2528         c->path = p;
2529
2530         strv_free(c->argv);
2531         c->argv = l;
2532
2533         return 0;
2534 }
2535
2536 static int exec_runtime_allocate(ExecRuntime **rt) {
2537
2538         if (*rt)
2539                 return 0;
2540
2541         *rt = new0(ExecRuntime, 1);
2542         if (!*rt)
2543                 return -ENOMEM;
2544
2545         (*rt)->n_ref = 1;
2546         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2547
2548         return 0;
2549 }
2550
2551 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2552         int r;
2553
2554         assert(rt);
2555         assert(c);
2556         assert(id);
2557
2558         if (*rt)
2559                 return 1;
2560
2561         if (!c->private_network && !c->private_tmp)
2562                 return 0;
2563
2564         r = exec_runtime_allocate(rt);
2565         if (r < 0)
2566                 return r;
2567
2568         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2569                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2570                         return -errno;
2571         }
2572
2573         if (c->private_tmp && !(*rt)->tmp_dir) {
2574                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2575                 if (r < 0)
2576                         return r;
2577         }
2578
2579         return 1;
2580 }
2581
2582 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2583         assert(r);
2584         assert(r->n_ref > 0);
2585
2586         r->n_ref++;
2587         return r;
2588 }
2589
2590 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2591
2592         if (!r)
2593                 return NULL;
2594
2595         assert(r->n_ref > 0);
2596
2597         r->n_ref--;
2598         if (r->n_ref <= 0) {
2599                 free(r->tmp_dir);
2600                 free(r->var_tmp_dir);
2601                 safe_close_pair(r->netns_storage_socket);
2602                 free(r);
2603         }
2604
2605         return NULL;
2606 }
2607
2608 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2609         assert(u);
2610         assert(f);
2611         assert(fds);
2612
2613         if (!rt)
2614                 return 0;
2615
2616         if (rt->tmp_dir)
2617                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2618
2619         if (rt->var_tmp_dir)
2620                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2621
2622         if (rt->netns_storage_socket[0] >= 0) {
2623                 int copy;
2624
2625                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2626                 if (copy < 0)
2627                         return copy;
2628
2629                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2630         }
2631
2632         if (rt->netns_storage_socket[1] >= 0) {
2633                 int copy;
2634
2635                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2636                 if (copy < 0)
2637                         return copy;
2638
2639                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2640         }
2641
2642         return 0;
2643 }
2644
2645 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2646         int r;
2647
2648         assert(rt);
2649         assert(key);
2650         assert(value);
2651
2652         if (streq(key, "tmp-dir")) {
2653                 char *copy;
2654
2655                 r = exec_runtime_allocate(rt);
2656                 if (r < 0)
2657                         return r;
2658
2659                 copy = strdup(value);
2660                 if (!copy)
2661                         return log_oom();
2662
2663                 free((*rt)->tmp_dir);
2664                 (*rt)->tmp_dir = copy;
2665
2666         } else if (streq(key, "var-tmp-dir")) {
2667                 char *copy;
2668
2669                 r = exec_runtime_allocate(rt);
2670                 if (r < 0)
2671                         return r;
2672
2673                 copy = strdup(value);
2674                 if (!copy)
2675                         return log_oom();
2676
2677                 free((*rt)->var_tmp_dir);
2678                 (*rt)->var_tmp_dir = copy;
2679
2680         } else if (streq(key, "netns-socket-0")) {
2681                 int fd;
2682
2683                 r = exec_runtime_allocate(rt);
2684                 if (r < 0)
2685                         return r;
2686
2687                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2688                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2689                 else {
2690                         safe_close((*rt)->netns_storage_socket[0]);
2691                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2692                 }
2693         } else if (streq(key, "netns-socket-1")) {
2694                 int fd;
2695
2696                 r = exec_runtime_allocate(rt);
2697                 if (r < 0)
2698                         return r;
2699
2700                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2701                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2702                 else {
2703                         safe_close((*rt)->netns_storage_socket[1]);
2704                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2705                 }
2706         } else
2707                 return 0;
2708
2709         return 1;
2710 }
2711
2712 static void *remove_tmpdir_thread(void *p) {
2713         _cleanup_free_ char *path = p;
2714
2715         rm_rf_dangerous(path, false, true, false);
2716         return NULL;
2717 }
2718
2719 void exec_runtime_destroy(ExecRuntime *rt) {
2720         int r;
2721
2722         if (!rt)
2723                 return;
2724
2725         /* If there are multiple users of this, let's leave the stuff around */
2726         if (rt->n_ref > 1)
2727                 return;
2728
2729         if (rt->tmp_dir) {
2730                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2731
2732                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2733                 if (r < 0) {
2734                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2735                         free(rt->tmp_dir);
2736                 }
2737
2738                 rt->tmp_dir = NULL;
2739         }
2740
2741         if (rt->var_tmp_dir) {
2742                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2743
2744                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2745                 if (r < 0) {
2746                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2747                         free(rt->var_tmp_dir);
2748                 }
2749
2750                 rt->var_tmp_dir = NULL;
2751         }
2752
2753         safe_close_pair(rt->netns_storage_socket);
2754 }
2755
2756 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2757         [EXEC_INPUT_NULL] = "null",
2758         [EXEC_INPUT_TTY] = "tty",
2759         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2760         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2761         [EXEC_INPUT_SOCKET] = "socket"
2762 };
2763
2764 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2765
2766 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2767         [EXEC_OUTPUT_INHERIT] = "inherit",
2768         [EXEC_OUTPUT_NULL] = "null",
2769         [EXEC_OUTPUT_TTY] = "tty",
2770         [EXEC_OUTPUT_SYSLOG] = "syslog",
2771         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2772         [EXEC_OUTPUT_KMSG] = "kmsg",
2773         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2774         [EXEC_OUTPUT_JOURNAL] = "journal",
2775         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2776         [EXEC_OUTPUT_SOCKET] = "socket"
2777 };
2778
2779 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);