chiark / gitweb /
util: replace close_nointr_nofail() by a more useful safe_close()
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "tcpwrap.h"
73 #include "exit-status.h"
74 #include "missing.h"
75 #include "utmp-wtmp.h"
76 #include "def.h"
77 #include "path-util.h"
78 #include "env-util.h"
79 #include "fileio.h"
80 #include "unit.h"
81 #include "async.h"
82 #include "selinux-util.h"
83 #include "errno-list.h"
84 #include "af-list.h"
85 #include "mkdir.h"
86 #include "apparmor-util.h"
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
93 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94
95 /* This assumes there is a 'tty' group */
96 #define TTY_MODE 0620
97
98 #define SNDBUF_SIZE (8*1024*1024)
99
100 static int shift_fds(int fds[], unsigned n_fds) {
101         int start, restart_from;
102
103         if (n_fds <= 0)
104                 return 0;
105
106         /* Modifies the fds array! (sorts it) */
107
108         assert(fds);
109
110         start = 0;
111         for (;;) {
112                 int i;
113
114                 restart_from = -1;
115
116                 for (i = start; i < (int) n_fds; i++) {
117                         int nfd;
118
119                         /* Already at right index? */
120                         if (fds[i] == i+3)
121                                 continue;
122
123                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
124                                 return -errno;
125
126                         safe_close(fds[i]);
127                         fds[i] = nfd;
128
129                         /* Hmm, the fd we wanted isn't free? Then
130                          * let's remember that and try again from here*/
131                         if (nfd != i+3 && restart_from < 0)
132                                 restart_from = i;
133                 }
134
135                 if (restart_from < 0)
136                         break;
137
138                 start = restart_from;
139         }
140
141         return 0;
142 }
143
144 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
145         unsigned i;
146         int r;
147
148         if (n_fds <= 0)
149                 return 0;
150
151         assert(fds);
152
153         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154
155         for (i = 0; i < n_fds; i++) {
156
157                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
158                         return r;
159
160                 /* We unconditionally drop FD_CLOEXEC from the fds,
161                  * since after all we want to pass these fds to our
162                  * children */
163
164                 if ((r = fd_cloexec(fds[i], false)) < 0)
165                         return r;
166         }
167
168         return 0;
169 }
170
171 _pure_ static const char *tty_path(const ExecContext *context) {
172         assert(context);
173
174         if (context->tty_path)
175                 return context->tty_path;
176
177         return "/dev/console";
178 }
179
180 static void exec_context_tty_reset(const ExecContext *context) {
181         assert(context);
182
183         if (context->tty_vhangup)
184                 terminal_vhangup(tty_path(context));
185
186         if (context->tty_reset)
187                 reset_terminal(tty_path(context));
188
189         if (context->tty_vt_disallocate && context->tty_path)
190                 vt_disallocate(context->tty_path);
191 }
192
193 static bool is_terminal_output(ExecOutput o) {
194         return
195                 o == EXEC_OUTPUT_TTY ||
196                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
199 }
200
201 static int open_null_as(int flags, int nfd) {
202         int fd, r;
203
204         assert(nfd >= 0);
205
206         fd = open("/dev/null", flags|O_NOCTTY);
207         if (fd < 0)
208                 return -errno;
209
210         if (fd != nfd) {
211                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
212                 safe_close(fd);
213         } else
214                 r = nfd;
215
216         return r;
217 }
218
219 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220         int fd, r;
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225
226         assert(context);
227         assert(output < _EXEC_OUTPUT_MAX);
228         assert(ident);
229         assert(nfd >= 0);
230
231         fd = socket(AF_UNIX, SOCK_STREAM, 0);
232         if (fd < 0)
233                 return -errno;
234
235         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
236         if (r < 0) {
237                 safe_close(fd);
238                 return -errno;
239         }
240
241         if (shutdown(fd, SHUT_RD) < 0) {
242                 safe_close(fd);
243                 return -errno;
244         }
245
246         fd_inc_sndbuf(fd, SNDBUF_SIZE);
247
248         dprintf(fd,
249                 "%s\n"
250                 "%s\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n",
256                 context->syslog_identifier ? context->syslog_identifier : ident,
257                 unit_id,
258                 context->syslog_priority,
259                 !!context->syslog_level_prefix,
260                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
261                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
262                 is_terminal_output(output));
263
264         if (fd != nfd) {
265                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
266                 safe_close(fd);
267         } else
268                 r = nfd;
269
270         return r;
271 }
272 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
273         int fd, r;
274
275         assert(path);
276         assert(nfd >= 0);
277
278         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
279                 return fd;
280
281         if (fd != nfd) {
282                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
283                 safe_close(fd);
284         } else
285                 r = nfd;
286
287         return r;
288 }
289
290 static bool is_terminal_input(ExecInput i) {
291         return
292                 i == EXEC_INPUT_TTY ||
293                 i == EXEC_INPUT_TTY_FORCE ||
294                 i == EXEC_INPUT_TTY_FAIL;
295 }
296
297 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298
299         if (is_terminal_input(std_input) && !apply_tty_stdin)
300                 return EXEC_INPUT_NULL;
301
302         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
303                 return EXEC_INPUT_NULL;
304
305         return std_input;
306 }
307
308 static int fixup_output(ExecOutput std_output, int socket_fd) {
309
310         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
311                 return EXEC_OUTPUT_INHERIT;
312
313         return std_output;
314 }
315
316 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
317         ExecInput i;
318
319         assert(context);
320
321         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
322
323         switch (i) {
324
325         case EXEC_INPUT_NULL:
326                 return open_null_as(O_RDONLY, STDIN_FILENO);
327
328         case EXEC_INPUT_TTY:
329         case EXEC_INPUT_TTY_FORCE:
330         case EXEC_INPUT_TTY_FAIL: {
331                 int fd, r;
332
333                 fd = acquire_terminal(tty_path(context),
334                                       i == EXEC_INPUT_TTY_FAIL,
335                                       i == EXEC_INPUT_TTY_FORCE,
336                                       false,
337                                       (usec_t) -1);
338                 if (fd < 0)
339                         return fd;
340
341                 if (fd != STDIN_FILENO) {
342                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
343                         safe_close(fd);
344                 } else
345                         r = STDIN_FILENO;
346
347                 return r;
348         }
349
350         case EXEC_INPUT_SOCKET:
351                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
352
353         default:
354                 assert_not_reached("Unknown input type");
355         }
356 }
357
358 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
359         ExecOutput o;
360         ExecInput i;
361         int r;
362
363         assert(context);
364         assert(ident);
365
366         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
367         o = fixup_output(context->std_output, socket_fd);
368
369         if (fileno == STDERR_FILENO) {
370                 ExecOutput e;
371                 e = fixup_output(context->std_error, socket_fd);
372
373                 /* This expects the input and output are already set up */
374
375                 /* Don't change the stderr file descriptor if we inherit all
376                  * the way and are not on a tty */
377                 if (e == EXEC_OUTPUT_INHERIT &&
378                     o == EXEC_OUTPUT_INHERIT &&
379                     i == EXEC_INPUT_NULL &&
380                     !is_terminal_input(context->std_input) &&
381                     getppid () != 1)
382                         return fileno;
383
384                 /* Duplicate from stdout if possible */
385                 if (e == o || e == EXEC_OUTPUT_INHERIT)
386                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
387
388                 o = e;
389
390         } else if (o == EXEC_OUTPUT_INHERIT) {
391                 /* If input got downgraded, inherit the original value */
392                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
393                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394
395                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
396                 if (i != EXEC_INPUT_NULL)
397                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398
399                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
400                 if (getppid() != 1)
401                         return fileno;
402
403                 /* We need to open /dev/null here anew, to get the right access mode. */
404                 return open_null_as(O_WRONLY, fileno);
405         }
406
407         switch (o) {
408
409         case EXEC_OUTPUT_NULL:
410                 return open_null_as(O_WRONLY, fileno);
411
412         case EXEC_OUTPUT_TTY:
413                 if (is_terminal_input(i))
414                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415
416                 /* We don't reset the terminal if this is just about output */
417                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418
419         case EXEC_OUTPUT_SYSLOG:
420         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421         case EXEC_OUTPUT_KMSG:
422         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423         case EXEC_OUTPUT_JOURNAL:
424         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425                 r = connect_logger_as(context, o, ident, unit_id, fileno);
426                 if (r < 0) {
427                         log_struct_unit(LOG_CRIT, unit_id,
428                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
429                                 fileno == STDOUT_FILENO ? "out" : "err",
430                                 unit_id, strerror(-r),
431                                 "ERRNO=%d", -r,
432                                 NULL);
433                         r = open_null_as(O_WRONLY, fileno);
434                 }
435                 return r;
436
437         case EXEC_OUTPUT_SOCKET:
438                 assert(socket_fd >= 0);
439                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
440
441         default:
442                 assert_not_reached("Unknown error type");
443         }
444 }
445
446 static int chown_terminal(int fd, uid_t uid) {
447         struct stat st;
448
449         assert(fd >= 0);
450
451         /* This might fail. What matters are the results. */
452         (void) fchown(fd, uid, -1);
453         (void) fchmod(fd, TTY_MODE);
454
455         if (fstat(fd, &st) < 0)
456                 return -errno;
457
458         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
459                 return -EPERM;
460
461         return 0;
462 }
463
464 static int setup_confirm_stdio(int *_saved_stdin,
465                                int *_saved_stdout) {
466         int fd = -1, saved_stdin, saved_stdout = -1, r;
467
468         assert(_saved_stdin);
469         assert(_saved_stdout);
470
471         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
472         if (saved_stdin < 0)
473                 return -errno;
474
475         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
476         if (saved_stdout < 0) {
477                 r = errno;
478                 goto fail;
479         }
480
481         fd = acquire_terminal(
482                         "/dev/console",
483                         false,
484                         false,
485                         false,
486                         DEFAULT_CONFIRM_USEC);
487         if (fd < 0) {
488                 r = fd;
489                 goto fail;
490         }
491
492         r = chown_terminal(fd, getuid());
493         if (r < 0)
494                 goto fail;
495
496         if (dup2(fd, STDIN_FILENO) < 0) {
497                 r = -errno;
498                 goto fail;
499         }
500
501         if (dup2(fd, STDOUT_FILENO) < 0) {
502                 r = -errno;
503                 goto fail;
504         }
505
506         if (fd >= 2)
507                 safe_close(fd);
508
509         *_saved_stdin = saved_stdin;
510         *_saved_stdout = saved_stdout;
511
512         return 0;
513
514 fail:
515         safe_close(saved_stdout);
516         safe_close(saved_stdin);
517         safe_close(fd);
518
519         return r;
520 }
521
522 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
523         _cleanup_close_ int fd = -1;
524         va_list ap;
525
526         assert(format);
527
528         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
529         if (fd < 0)
530                 return fd;
531
532         va_start(ap, format);
533         vdprintf(fd, format, ap);
534         va_end(ap);
535
536         return 0;
537 }
538
539 static int restore_confirm_stdio(int *saved_stdin,
540                                  int *saved_stdout) {
541
542         int r = 0;
543
544         assert(saved_stdin);
545         assert(saved_stdout);
546
547         release_terminal();
548
549         if (*saved_stdin >= 0)
550                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
551                         r = -errno;
552
553         if (*saved_stdout >= 0)
554                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
555                         r = -errno;
556
557         safe_close(*saved_stdin);
558         safe_close(*saved_stdout);
559
560         return r;
561 }
562
563 static int ask_for_confirmation(char *response, char **argv) {
564         int saved_stdout = -1, saved_stdin = -1, r;
565         char *line;
566
567         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
568         if (r < 0)
569                 return r;
570
571         line = exec_command_line(argv);
572         if (!line)
573                 return -ENOMEM;
574
575         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
576         free(line);
577
578         restore_confirm_stdio(&saved_stdin, &saved_stdout);
579
580         return r;
581 }
582
583 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
584         bool keep_groups = false;
585         int r;
586
587         assert(context);
588
589         /* Lookup and set GID and supplementary group list. Here too
590          * we avoid NSS lookups for gid=0. */
591
592         if (context->group || username) {
593
594                 if (context->group) {
595                         const char *g = context->group;
596
597                         if ((r = get_group_creds(&g, &gid)) < 0)
598                                 return r;
599                 }
600
601                 /* First step, initialize groups from /etc/groups */
602                 if (username && gid != 0) {
603                         if (initgroups(username, gid) < 0)
604                                 return -errno;
605
606                         keep_groups = true;
607                 }
608
609                 /* Second step, set our gids */
610                 if (setresgid(gid, gid, gid) < 0)
611                         return -errno;
612         }
613
614         if (context->supplementary_groups) {
615                 int ngroups_max, k;
616                 gid_t *gids;
617                 char **i;
618
619                 /* Final step, initialize any manually set supplementary groups */
620                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
621
622                 if (!(gids = new(gid_t, ngroups_max)))
623                         return -ENOMEM;
624
625                 if (keep_groups) {
626                         if ((k = getgroups(ngroups_max, gids)) < 0) {
627                                 free(gids);
628                                 return -errno;
629                         }
630                 } else
631                         k = 0;
632
633                 STRV_FOREACH(i, context->supplementary_groups) {
634                         const char *g;
635
636                         if (k >= ngroups_max) {
637                                 free(gids);
638                                 return -E2BIG;
639                         }
640
641                         g = *i;
642                         r = get_group_creds(&g, gids+k);
643                         if (r < 0) {
644                                 free(gids);
645                                 return r;
646                         }
647
648                         k++;
649                 }
650
651                 if (setgroups(k, gids) < 0) {
652                         free(gids);
653                         return -errno;
654                 }
655
656                 free(gids);
657         }
658
659         return 0;
660 }
661
662 static int enforce_user(const ExecContext *context, uid_t uid) {
663         assert(context);
664
665         /* Sets (but doesn't lookup) the uid and make sure we keep the
666          * capabilities while doing so. */
667
668         if (context->capabilities) {
669                 _cleanup_cap_free_ cap_t d = NULL;
670                 static const cap_value_t bits[] = {
671                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
672                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
673                 };
674
675                 /* First step: If we need to keep capabilities but
676                  * drop privileges we need to make sure we keep our
677                  * caps, while we drop privileges. */
678                 if (uid != 0) {
679                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
680
681                         if (prctl(PR_GET_SECUREBITS) != sb)
682                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
683                                         return -errno;
684                 }
685
686                 /* Second step: set the capabilities. This will reduce
687                  * the capabilities to the minimum we need. */
688
689                 d = cap_dup(context->capabilities);
690                 if (!d)
691                         return -errno;
692
693                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
694                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
695                         return -errno;
696
697                 if (cap_set_proc(d) < 0)
698                         return -errno;
699         }
700
701         /* Third step: actually set the uids */
702         if (setresuid(uid, uid, uid) < 0)
703                 return -errno;
704
705         /* At this point we should have all necessary capabilities but
706            are otherwise a normal user. However, the caps might got
707            corrupted due to the setresuid() so we need clean them up
708            later. This is done outside of this call. */
709
710         return 0;
711 }
712
713 #ifdef HAVE_PAM
714
715 static int null_conv(
716                 int num_msg,
717                 const struct pam_message **msg,
718                 struct pam_response **resp,
719                 void *appdata_ptr) {
720
721         /* We don't support conversations */
722
723         return PAM_CONV_ERR;
724 }
725
726 static int setup_pam(
727                 const char *name,
728                 const char *user,
729                 uid_t uid,
730                 const char *tty,
731                 char ***pam_env,
732                 int fds[], unsigned n_fds) {
733
734         static const struct pam_conv conv = {
735                 .conv = null_conv,
736                 .appdata_ptr = NULL
737         };
738
739         pam_handle_t *handle = NULL;
740         sigset_t ss, old_ss;
741         int pam_code = PAM_SUCCESS;
742         int err;
743         char **e = NULL;
744         bool close_session = false;
745         pid_t pam_pid = 0, parent_pid;
746         int flags = 0;
747
748         assert(name);
749         assert(user);
750         assert(pam_env);
751
752         /* We set up PAM in the parent process, then fork. The child
753          * will then stay around until killed via PR_GET_PDEATHSIG or
754          * systemd via the cgroup logic. It will then remove the PAM
755          * session again. The parent process will exec() the actual
756          * daemon. We do things this way to ensure that the main PID
757          * of the daemon is the one we initially fork()ed. */
758
759         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
760                 flags |= PAM_SILENT;
761
762         pam_code = pam_start(name, user, &conv, &handle);
763         if (pam_code != PAM_SUCCESS) {
764                 handle = NULL;
765                 goto fail;
766         }
767
768         if (tty) {
769                 pam_code = pam_set_item(handle, PAM_TTY, tty);
770                 if (pam_code != PAM_SUCCESS)
771                         goto fail;
772         }
773
774         pam_code = pam_acct_mgmt(handle, flags);
775         if (pam_code != PAM_SUCCESS)
776                 goto fail;
777
778         pam_code = pam_open_session(handle, flags);
779         if (pam_code != PAM_SUCCESS)
780                 goto fail;
781
782         close_session = true;
783
784         e = pam_getenvlist(handle);
785         if (!e) {
786                 pam_code = PAM_BUF_ERR;
787                 goto fail;
788         }
789
790         /* Block SIGTERM, so that we know that it won't get lost in
791          * the child */
792         if (sigemptyset(&ss) < 0 ||
793             sigaddset(&ss, SIGTERM) < 0 ||
794             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
795                 goto fail;
796
797         parent_pid = getpid();
798
799         pam_pid = fork();
800         if (pam_pid < 0)
801                 goto fail;
802
803         if (pam_pid == 0) {
804                 int sig;
805                 int r = EXIT_PAM;
806
807                 /* The child's job is to reset the PAM session on
808                  * termination */
809
810                 /* This string must fit in 10 chars (i.e. the length
811                  * of "/sbin/init"), to look pretty in /bin/ps */
812                 rename_process("(sd-pam)");
813
814                 /* Make sure we don't keep open the passed fds in this
815                 child. We assume that otherwise only those fds are
816                 open here that have been opened by PAM. */
817                 close_many(fds, n_fds);
818
819                 /* Drop privileges - we don't need any to pam_close_session
820                  * and this will make PR_SET_PDEATHSIG work in most cases.
821                  * If this fails, ignore the error - but expect sd-pam threads
822                  * to fail to exit normally */
823                 if (setresuid(uid, uid, uid) < 0)
824                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
825
826                 /* Wait until our parent died. This will only work if
827                  * the above setresuid() succeeds, otherwise the kernel
828                  * will not allow unprivileged parents kill their privileged
829                  * children this way. We rely on the control groups kill logic
830                  * to do the rest for us. */
831                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
832                         goto child_finish;
833
834                 /* Check if our parent process might already have
835                  * died? */
836                 if (getppid() == parent_pid) {
837                         for (;;) {
838                                 if (sigwait(&ss, &sig) < 0) {
839                                         if (errno == EINTR)
840                                                 continue;
841
842                                         goto child_finish;
843                                 }
844
845                                 assert(sig == SIGTERM);
846                                 break;
847                         }
848                 }
849
850                 /* If our parent died we'll end the session */
851                 if (getppid() != parent_pid) {
852                         pam_code = pam_close_session(handle, flags);
853                         if (pam_code != PAM_SUCCESS)
854                                 goto child_finish;
855                 }
856
857                 r = 0;
858
859         child_finish:
860                 pam_end(handle, pam_code | flags);
861                 _exit(r);
862         }
863
864         /* If the child was forked off successfully it will do all the
865          * cleanups, so forget about the handle here. */
866         handle = NULL;
867
868         /* Unblock SIGTERM again in the parent */
869         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
870                 goto fail;
871
872         /* We close the log explicitly here, since the PAM modules
873          * might have opened it, but we don't want this fd around. */
874         closelog();
875
876         *pam_env = e;
877         e = NULL;
878
879         return 0;
880
881 fail:
882         if (pam_code != PAM_SUCCESS) {
883                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
884                 err = -EPERM;  /* PAM errors do not map to errno */
885         } else {
886                 log_error("PAM failed: %m");
887                 err = -errno;
888         }
889
890         if (handle) {
891                 if (close_session)
892                         pam_code = pam_close_session(handle, flags);
893
894                 pam_end(handle, pam_code | flags);
895         }
896
897         strv_free(e);
898
899         closelog();
900
901         if (pam_pid > 1) {
902                 kill(pam_pid, SIGTERM);
903                 kill(pam_pid, SIGCONT);
904         }
905
906         return err;
907 }
908 #endif
909
910 static void rename_process_from_path(const char *path) {
911         char process_name[11];
912         const char *p;
913         size_t l;
914
915         /* This resulting string must fit in 10 chars (i.e. the length
916          * of "/sbin/init") to look pretty in /bin/ps */
917
918         p = basename(path);
919         if (isempty(p)) {
920                 rename_process("(...)");
921                 return;
922         }
923
924         l = strlen(p);
925         if (l > 8) {
926                 /* The end of the process name is usually more
927                  * interesting, since the first bit might just be
928                  * "systemd-" */
929                 p = p + l - 8;
930                 l = 8;
931         }
932
933         process_name[0] = '(';
934         memcpy(process_name+1, p, l);
935         process_name[1+l] = ')';
936         process_name[1+l+1] = 0;
937
938         rename_process(process_name);
939 }
940
941 #ifdef HAVE_SECCOMP
942
943 static int apply_seccomp(ExecContext *c) {
944         uint32_t negative_action, action;
945         scmp_filter_ctx *seccomp;
946         Iterator i;
947         void *id;
948         int r;
949
950         assert(c);
951
952         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
953
954         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
955         if (!seccomp)
956                 return -ENOMEM;
957
958         if (c->syscall_archs) {
959
960                 SET_FOREACH(id, c->syscall_archs, i) {
961                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
962                         if (r == -EEXIST)
963                                 continue;
964                         if (r < 0)
965                                 goto finish;
966                 }
967
968         } else {
969                 r = seccomp_add_secondary_archs(seccomp);
970                 if (r < 0)
971                         goto finish;
972         }
973
974         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
975         SET_FOREACH(id, c->syscall_filter, i) {
976                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
977                 if (r < 0)
978                         goto finish;
979         }
980
981         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
982         if (r < 0)
983                 goto finish;
984
985         r = seccomp_load(seccomp);
986
987 finish:
988         seccomp_release(seccomp);
989         return r;
990 }
991
992 static int apply_address_families(ExecContext *c) {
993         scmp_filter_ctx *seccomp;
994         Iterator i;
995         int r;
996
997         assert(c);
998
999         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1000         if (!seccomp)
1001                 return -ENOMEM;
1002
1003         r = seccomp_add_secondary_archs(seccomp);
1004         if (r < 0)
1005                 goto finish;
1006
1007         if (c->address_families_whitelist) {
1008                 int af, first = 0, last = 0;
1009                 void *afp;
1010
1011                 /* If this is a whitelist, we first block the address
1012                  * families that are out of range and then everything
1013                  * that is not in the set. First, we find the lowest
1014                  * and highest address family in the set. */
1015
1016                 SET_FOREACH(afp, c->address_families, i) {
1017                         af = PTR_TO_INT(afp);
1018
1019                         if (af <= 0 || af >= af_max())
1020                                 continue;
1021
1022                         if (first == 0 || af < first)
1023                                 first = af;
1024
1025                         if (last == 0 || af > last)
1026                                 last = af;
1027                 }
1028
1029                 assert((first == 0) == (last == 0));
1030
1031                 if (first == 0) {
1032
1033                         /* No entries in the valid range, block everything */
1034                         r = seccomp_rule_add(
1035                                         seccomp,
1036                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1037                                         SCMP_SYS(socket),
1038                                         0);
1039                         if (r < 0)
1040                                 goto finish;
1041
1042                 } else {
1043
1044                         /* Block everything below the first entry */
1045                         r = seccomp_rule_add(
1046                                         seccomp,
1047                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1048                                         SCMP_SYS(socket),
1049                                         1,
1050                                         SCMP_A0(SCMP_CMP_LT, first));
1051                         if (r < 0)
1052                                 goto finish;
1053
1054                         /* Block everything above the last entry */
1055                         r = seccomp_rule_add(
1056                                         seccomp,
1057                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1058                                         SCMP_SYS(socket),
1059                                         1,
1060                                         SCMP_A0(SCMP_CMP_GT, last));
1061                         if (r < 0)
1062                                 goto finish;
1063
1064                         /* Block everything between the first and last
1065                          * entry */
1066                         for (af = 1; af < af_max(); af++) {
1067
1068                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1069                                         continue;
1070
1071                                 r = seccomp_rule_add(
1072                                                 seccomp,
1073                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1074                                                 SCMP_SYS(socket),
1075                                                 1,
1076                                                 SCMP_A0(SCMP_CMP_EQ, af));
1077                                 if (r < 0)
1078                                         goto finish;
1079                         }
1080                 }
1081
1082         } else {
1083                 void *af;
1084
1085                 /* If this is a blacklist, then generate one rule for
1086                  * each address family that are then combined in OR
1087                  * checks. */
1088
1089                 SET_FOREACH(af, c->address_families, i) {
1090
1091                         r = seccomp_rule_add(
1092                                         seccomp,
1093                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1094                                         SCMP_SYS(socket),
1095                                         1,
1096                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1097                         if (r < 0)
1098                                 goto finish;
1099                 }
1100         }
1101
1102         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1103         if (r < 0)
1104                 goto finish;
1105
1106         r = seccomp_load(seccomp);
1107
1108 finish:
1109         seccomp_release(seccomp);
1110         return r;
1111 }
1112
1113 #endif
1114
1115 static void do_idle_pipe_dance(int idle_pipe[4]) {
1116         assert(idle_pipe);
1117
1118
1119         safe_close(idle_pipe[1]);
1120         safe_close(idle_pipe[2]);
1121
1122         if (idle_pipe[0] >= 0) {
1123                 int r;
1124
1125                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1126
1127                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1128                         /* Signal systemd that we are bored and want to continue. */
1129                         write(idle_pipe[3], "x", 1);
1130
1131                         /* Wait for systemd to react to the signal above. */
1132                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1133                 }
1134
1135                 safe_close(idle_pipe[0]);
1136
1137         }
1138
1139         safe_close(idle_pipe[3]);
1140 }
1141
1142 static int build_environment(
1143                 ExecContext *c,
1144                 unsigned n_fds,
1145                 usec_t watchdog_usec,
1146                 const char *home,
1147                 const char *username,
1148                 const char *shell,
1149                 char ***ret) {
1150
1151         _cleanup_strv_free_ char **our_env = NULL;
1152         unsigned n_env = 0;
1153         char *x;
1154
1155         assert(c);
1156         assert(ret);
1157
1158         our_env = new0(char*, 10);
1159         if (!our_env)
1160                 return -ENOMEM;
1161
1162         if (n_fds > 0) {
1163                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1164                         return -ENOMEM;
1165                 our_env[n_env++] = x;
1166
1167                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1168                         return -ENOMEM;
1169                 our_env[n_env++] = x;
1170         }
1171
1172         if (watchdog_usec > 0) {
1173                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1174                         return -ENOMEM;
1175                 our_env[n_env++] = x;
1176
1177                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1178                         return -ENOMEM;
1179                 our_env[n_env++] = x;
1180         }
1181
1182         if (home) {
1183                 x = strappend("HOME=", home);
1184                 if (!x)
1185                         return -ENOMEM;
1186                 our_env[n_env++] = x;
1187         }
1188
1189         if (username) {
1190                 x = strappend("LOGNAME=", username);
1191                 if (!x)
1192                         return -ENOMEM;
1193                 our_env[n_env++] = x;
1194
1195                 x = strappend("USER=", username);
1196                 if (!x)
1197                         return -ENOMEM;
1198                 our_env[n_env++] = x;
1199         }
1200
1201         if (shell) {
1202                 x = strappend("SHELL=", shell);
1203                 if (!x)
1204                         return -ENOMEM;
1205                 our_env[n_env++] = x;
1206         }
1207
1208         if (is_terminal_input(c->std_input) ||
1209             c->std_output == EXEC_OUTPUT_TTY ||
1210             c->std_error == EXEC_OUTPUT_TTY ||
1211             c->tty_path) {
1212
1213                 x = strdup(default_term_for_tty(tty_path(c)));
1214                 if (!x)
1215                         return -ENOMEM;
1216                 our_env[n_env++] = x;
1217         }
1218
1219         our_env[n_env++] = NULL;
1220         assert(n_env <= 10);
1221
1222         *ret = our_env;
1223         our_env = NULL;
1224
1225         return 0;
1226 }
1227
1228 int exec_spawn(ExecCommand *command,
1229                char **argv,
1230                ExecContext *context,
1231                int fds[], unsigned n_fds,
1232                char **environment,
1233                bool apply_permissions,
1234                bool apply_chroot,
1235                bool apply_tty_stdin,
1236                bool confirm_spawn,
1237                CGroupControllerMask cgroup_supported,
1238                const char *cgroup_path,
1239                const char *runtime_prefix,
1240                const char *unit_id,
1241                usec_t watchdog_usec,
1242                int idle_pipe[4],
1243                ExecRuntime *runtime,
1244                pid_t *ret) {
1245
1246         _cleanup_strv_free_ char **files_env = NULL;
1247         int socket_fd;
1248         char *line;
1249         pid_t pid;
1250         int r;
1251
1252         assert(command);
1253         assert(context);
1254         assert(ret);
1255         assert(fds || n_fds <= 0);
1256
1257         if (context->std_input == EXEC_INPUT_SOCKET ||
1258             context->std_output == EXEC_OUTPUT_SOCKET ||
1259             context->std_error == EXEC_OUTPUT_SOCKET) {
1260
1261                 if (n_fds != 1)
1262                         return -EINVAL;
1263
1264                 socket_fd = fds[0];
1265
1266                 fds = NULL;
1267                 n_fds = 0;
1268         } else
1269                 socket_fd = -1;
1270
1271         r = exec_context_load_environment(context, &files_env);
1272         if (r < 0) {
1273                 log_struct_unit(LOG_ERR,
1274                            unit_id,
1275                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1276                            "ERRNO=%d", -r,
1277                            NULL);
1278                 return r;
1279         }
1280
1281         if (!argv)
1282                 argv = command->argv;
1283
1284         line = exec_command_line(argv);
1285         if (!line)
1286                 return log_oom();
1287
1288         log_struct_unit(LOG_DEBUG,
1289                         unit_id,
1290                         "EXECUTABLE=%s", command->path,
1291                         "MESSAGE=About to execute: %s", line,
1292                         NULL);
1293         free(line);
1294
1295         pid = fork();
1296         if (pid < 0)
1297                 return -errno;
1298
1299         if (pid == 0) {
1300                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1301                 const char *username = NULL, *home = NULL, *shell = NULL;
1302                 unsigned n_dont_close = 0;
1303                 int dont_close[n_fds + 3];
1304                 uid_t uid = (uid_t) -1;
1305                 gid_t gid = (gid_t) -1;
1306                 sigset_t ss;
1307                 int i, err;
1308
1309                 /* child */
1310
1311                 rename_process_from_path(command->path);
1312
1313                 /* We reset exactly these signals, since they are the
1314                  * only ones we set to SIG_IGN in the main daemon. All
1315                  * others we leave untouched because we set them to
1316                  * SIG_DFL or a valid handler initially, both of which
1317                  * will be demoted to SIG_DFL. */
1318                 default_signals(SIGNALS_CRASH_HANDLER,
1319                                 SIGNALS_IGNORE, -1);
1320
1321                 if (context->ignore_sigpipe)
1322                         ignore_signals(SIGPIPE, -1);
1323
1324                 assert_se(sigemptyset(&ss) == 0);
1325                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1326                         err = -errno;
1327                         r = EXIT_SIGNAL_MASK;
1328                         goto fail_child;
1329                 }
1330
1331                 if (idle_pipe)
1332                         do_idle_pipe_dance(idle_pipe);
1333
1334                 /* Close sockets very early to make sure we don't
1335                  * block init reexecution because it cannot bind its
1336                  * sockets */
1337                 log_forget_fds();
1338
1339                 if (socket_fd >= 0)
1340                         dont_close[n_dont_close++] = socket_fd;
1341                 if (n_fds > 0) {
1342                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1343                         n_dont_close += n_fds;
1344                 }
1345                 if (runtime) {
1346                         if (runtime->netns_storage_socket[0] >= 0)
1347                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1348                         if (runtime->netns_storage_socket[1] >= 0)
1349                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1350                 }
1351
1352                 err = close_all_fds(dont_close, n_dont_close);
1353                 if (err < 0) {
1354                         r = EXIT_FDS;
1355                         goto fail_child;
1356                 }
1357
1358                 if (!context->same_pgrp)
1359                         if (setsid() < 0) {
1360                                 err = -errno;
1361                                 r = EXIT_SETSID;
1362                                 goto fail_child;
1363                         }
1364
1365                 if (context->tcpwrap_name) {
1366                         if (socket_fd >= 0)
1367                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1368                                         err = -EACCES;
1369                                         r = EXIT_TCPWRAP;
1370                                         goto fail_child;
1371                                 }
1372
1373                         for (i = 0; i < (int) n_fds; i++) {
1374                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1375                                         err = -EACCES;
1376                                         r = EXIT_TCPWRAP;
1377                                         goto fail_child;
1378                                 }
1379                         }
1380                 }
1381
1382                 exec_context_tty_reset(context);
1383
1384                 if (confirm_spawn) {
1385                         char response;
1386
1387                         err = ask_for_confirmation(&response, argv);
1388                         if (err == -ETIMEDOUT)
1389                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1390                         else if (err < 0)
1391                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1392                         else if (response == 's') {
1393                                 write_confirm_message("Skipping execution.\n");
1394                                 err = -ECANCELED;
1395                                 r = EXIT_CONFIRM;
1396                                 goto fail_child;
1397                         } else if (response == 'n') {
1398                                 write_confirm_message("Failing execution.\n");
1399                                 err = r = 0;
1400                                 goto fail_child;
1401                         }
1402                 }
1403
1404                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1405                  * must sure to drop O_NONBLOCK */
1406                 if (socket_fd >= 0)
1407                         fd_nonblock(socket_fd, false);
1408
1409                 err = setup_input(context, socket_fd, apply_tty_stdin);
1410                 if (err < 0) {
1411                         r = EXIT_STDIN;
1412                         goto fail_child;
1413                 }
1414
1415                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1416                 if (err < 0) {
1417                         r = EXIT_STDOUT;
1418                         goto fail_child;
1419                 }
1420
1421                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1422                 if (err < 0) {
1423                         r = EXIT_STDERR;
1424                         goto fail_child;
1425                 }
1426
1427                 if (cgroup_path) {
1428                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1429                         if (err < 0) {
1430                                 r = EXIT_CGROUP;
1431                                 goto fail_child;
1432                         }
1433                 }
1434
1435                 if (context->oom_score_adjust_set) {
1436                         char t[16];
1437
1438                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1439                         char_array_0(t);
1440
1441                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1442                                 err = -errno;
1443                                 r = EXIT_OOM_ADJUST;
1444                                 goto fail_child;
1445                         }
1446                 }
1447
1448                 if (context->nice_set)
1449                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1450                                 err = -errno;
1451                                 r = EXIT_NICE;
1452                                 goto fail_child;
1453                         }
1454
1455                 if (context->cpu_sched_set) {
1456                         struct sched_param param = {
1457                                 .sched_priority = context->cpu_sched_priority,
1458                         };
1459
1460                         r = sched_setscheduler(0,
1461                                                context->cpu_sched_policy |
1462                                                (context->cpu_sched_reset_on_fork ?
1463                                                 SCHED_RESET_ON_FORK : 0),
1464                                                &param);
1465                         if (r < 0) {
1466                                 err = -errno;
1467                                 r = EXIT_SETSCHEDULER;
1468                                 goto fail_child;
1469                         }
1470                 }
1471
1472                 if (context->cpuset)
1473                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1474                                 err = -errno;
1475                                 r = EXIT_CPUAFFINITY;
1476                                 goto fail_child;
1477                         }
1478
1479                 if (context->ioprio_set)
1480                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1481                                 err = -errno;
1482                                 r = EXIT_IOPRIO;
1483                                 goto fail_child;
1484                         }
1485
1486                 if (context->timer_slack_nsec != (nsec_t) -1)
1487                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1488                                 err = -errno;
1489                                 r = EXIT_TIMERSLACK;
1490                                 goto fail_child;
1491                         }
1492
1493                 if (context->personality != 0xffffffffUL)
1494                         if (personality(context->personality) < 0) {
1495                                 err = -errno;
1496                                 r = EXIT_PERSONALITY;
1497                                 goto fail_child;
1498                         }
1499
1500                 if (context->utmp_id)
1501                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1502
1503                 if (context->user) {
1504                         username = context->user;
1505                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1506                         if (err < 0) {
1507                                 r = EXIT_USER;
1508                                 goto fail_child;
1509                         }
1510
1511                         if (is_terminal_input(context->std_input)) {
1512                                 err = chown_terminal(STDIN_FILENO, uid);
1513                                 if (err < 0) {
1514                                         r = EXIT_STDIN;
1515                                         goto fail_child;
1516                                 }
1517                         }
1518                 }
1519
1520 #ifdef HAVE_PAM
1521                 if (cgroup_path && context->user && context->pam_name) {
1522                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1523                         if (err < 0) {
1524                                 r = EXIT_CGROUP;
1525                                 goto fail_child;
1526                         }
1527
1528
1529                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1530                         if (err < 0) {
1531                                 r = EXIT_CGROUP;
1532                                 goto fail_child;
1533                         }
1534                 }
1535 #endif
1536
1537                 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1538                         char **rt;
1539
1540                         STRV_FOREACH(rt, context->runtime_directory) {
1541                                 _cleanup_free_ char *p;
1542
1543                                 p = strjoin(runtime_prefix, "/", *rt, NULL);
1544                                 if (!p) {
1545                                         r = EXIT_RUNTIME_DIRECTORY;
1546                                         err = -ENOMEM;
1547                                         goto fail_child;
1548                                 }
1549
1550                                 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1551                                 if (err < 0) {
1552                                         r = EXIT_RUNTIME_DIRECTORY;
1553                                         goto fail_child;
1554                                 }
1555                         }
1556                 }
1557
1558                 if (apply_permissions) {
1559                         err = enforce_groups(context, username, gid);
1560                         if (err < 0) {
1561                                 r = EXIT_GROUP;
1562                                 goto fail_child;
1563                         }
1564                 }
1565
1566                 umask(context->umask);
1567
1568 #ifdef HAVE_PAM
1569                 if (apply_permissions && context->pam_name && username) {
1570                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1571                         if (err < 0) {
1572                                 r = EXIT_PAM;
1573                                 goto fail_child;
1574                         }
1575                 }
1576 #endif
1577                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1578                         err = setup_netns(runtime->netns_storage_socket);
1579                         if (err < 0) {
1580                                 r = EXIT_NETWORK;
1581                                 goto fail_child;
1582                         }
1583                 }
1584
1585                 if (!strv_isempty(context->read_write_dirs) ||
1586                     !strv_isempty(context->read_only_dirs) ||
1587                     !strv_isempty(context->inaccessible_dirs) ||
1588                     context->mount_flags != 0 ||
1589                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1590                     context->private_devices) {
1591
1592                         char *tmp = NULL, *var = NULL;
1593
1594                         /* The runtime struct only contains the parent
1595                          * of the private /tmp, which is
1596                          * non-accessible to world users. Inside of it
1597                          * there's a /tmp that is sticky, and that's
1598                          * the one we want to use here. */
1599
1600                         if (context->private_tmp && runtime) {
1601                                 if (runtime->tmp_dir)
1602                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1603                                 if (runtime->var_tmp_dir)
1604                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1605                         }
1606
1607                         err = setup_namespace(
1608                                         context->read_write_dirs,
1609                                         context->read_only_dirs,
1610                                         context->inaccessible_dirs,
1611                                         tmp,
1612                                         var,
1613                                         context->private_devices,
1614                                         context->mount_flags);
1615
1616                         if (err < 0) {
1617                                 r = EXIT_NAMESPACE;
1618                                 goto fail_child;
1619                         }
1620                 }
1621
1622                 if (apply_chroot) {
1623                         if (context->root_directory)
1624                                 if (chroot(context->root_directory) < 0) {
1625                                         err = -errno;
1626                                         r = EXIT_CHROOT;
1627                                         goto fail_child;
1628                                 }
1629
1630                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1631                                 err = -errno;
1632                                 r = EXIT_CHDIR;
1633                                 goto fail_child;
1634                         }
1635                 } else {
1636                         _cleanup_free_ char *d = NULL;
1637
1638                         if (asprintf(&d, "%s/%s",
1639                                      context->root_directory ? context->root_directory : "",
1640                                      context->working_directory ? context->working_directory : "") < 0) {
1641                                 err = -ENOMEM;
1642                                 r = EXIT_MEMORY;
1643                                 goto fail_child;
1644                         }
1645
1646                         if (chdir(d) < 0) {
1647                                 err = -errno;
1648                                 r = EXIT_CHDIR;
1649                                 goto fail_child;
1650                         }
1651                 }
1652
1653                 /* We repeat the fd closing here, to make sure that
1654                  * nothing is leaked from the PAM modules */
1655                 err = close_all_fds(fds, n_fds);
1656                 if (err >= 0)
1657                         err = shift_fds(fds, n_fds);
1658                 if (err >= 0)
1659                         err = flags_fds(fds, n_fds, context->non_blocking);
1660                 if (err < 0) {
1661                         r = EXIT_FDS;
1662                         goto fail_child;
1663                 }
1664
1665                 if (apply_permissions) {
1666
1667                         for (i = 0; i < _RLIMIT_MAX; i++) {
1668                                 if (!context->rlimit[i])
1669                                         continue;
1670
1671                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1672                                         err = -errno;
1673                                         r = EXIT_LIMITS;
1674                                         goto fail_child;
1675                                 }
1676                         }
1677
1678                         if (context->capability_bounding_set_drop) {
1679                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1680                                 if (err < 0) {
1681                                         r = EXIT_CAPABILITIES;
1682                                         goto fail_child;
1683                                 }
1684                         }
1685
1686                         if (context->user) {
1687                                 err = enforce_user(context, uid);
1688                                 if (err < 0) {
1689                                         r = EXIT_USER;
1690                                         goto fail_child;
1691                                 }
1692                         }
1693
1694                         /* PR_GET_SECUREBITS is not privileged, while
1695                          * PR_SET_SECUREBITS is. So to suppress
1696                          * potential EPERMs we'll try not to call
1697                          * PR_SET_SECUREBITS unless necessary. */
1698                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1699                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1700                                         err = -errno;
1701                                         r = EXIT_SECUREBITS;
1702                                         goto fail_child;
1703                                 }
1704
1705                         if (context->capabilities)
1706                                 if (cap_set_proc(context->capabilities) < 0) {
1707                                         err = -errno;
1708                                         r = EXIT_CAPABILITIES;
1709                                         goto fail_child;
1710                                 }
1711
1712                         if (context->no_new_privileges)
1713                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1714                                         err = -errno;
1715                                         r = EXIT_NO_NEW_PRIVILEGES;
1716                                         goto fail_child;
1717                                 }
1718
1719 #ifdef HAVE_SECCOMP
1720                         if (context->address_families_whitelist ||
1721                             !set_isempty(context->address_families)) {
1722                                 err = apply_address_families(context);
1723                                 if (err < 0) {
1724                                         r = EXIT_ADDRESS_FAMILIES;
1725                                         goto fail_child;
1726                                 }
1727                         }
1728
1729                         if (context->syscall_whitelist ||
1730                             !set_isempty(context->syscall_filter) ||
1731                             !set_isempty(context->syscall_archs)) {
1732                                 err = apply_seccomp(context);
1733                                 if (err < 0) {
1734                                         r = EXIT_SECCOMP;
1735                                         goto fail_child;
1736                                 }
1737                         }
1738 #endif
1739
1740 #ifdef HAVE_SELINUX
1741                         if (context->selinux_context && use_selinux()) {
1742                                 err = setexeccon(context->selinux_context);
1743                                 if (err < 0 && !context->selinux_context_ignore) {
1744                                         r = EXIT_SELINUX_CONTEXT;
1745                                         goto fail_child;
1746                                 }
1747                         }
1748 #endif
1749
1750 #ifdef HAVE_APPARMOR
1751                         if (context->apparmor_profile && use_apparmor()) {
1752                                 err = aa_change_onexec(context->apparmor_profile);
1753                                 if (err < 0 && !context->apparmor_profile_ignore) {
1754                                         r = EXIT_APPARMOR_PROFILE;
1755                                         goto fail_child;
1756                                 }
1757                         }
1758 #endif
1759                 }
1760
1761                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1762                 if (r < 0) {
1763                         r = EXIT_MEMORY;
1764                         goto fail_child;
1765                 }
1766
1767                 final_env = strv_env_merge(5,
1768                                            environment,
1769                                            our_env,
1770                                            context->environment,
1771                                            files_env,
1772                                            pam_env,
1773                                            NULL);
1774                 if (!final_env) {
1775                         err = -ENOMEM;
1776                         r = EXIT_MEMORY;
1777                         goto fail_child;
1778                 }
1779
1780                 final_argv = replace_env_argv(argv, final_env);
1781                 if (!final_argv) {
1782                         err = -ENOMEM;
1783                         r = EXIT_MEMORY;
1784                         goto fail_child;
1785                 }
1786
1787                 final_env = strv_env_clean(final_env);
1788
1789                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1790                         line = exec_command_line(final_argv);
1791                         if (line) {
1792                                 log_open();
1793                                 log_struct_unit(LOG_DEBUG,
1794                                                 unit_id,
1795                                                 "EXECUTABLE=%s", command->path,
1796                                                 "MESSAGE=Executing: %s", line,
1797                                                 NULL);
1798                                 log_close();
1799                                 free(line);
1800                                 line = NULL;
1801                         }
1802                 }
1803                 execve(command->path, final_argv, final_env);
1804                 err = -errno;
1805                 r = EXIT_EXEC;
1806
1807         fail_child:
1808                 if (r != 0) {
1809                         log_open();
1810                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1811                                    "EXECUTABLE=%s", command->path,
1812                                    "MESSAGE=Failed at step %s spawning %s: %s",
1813                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1814                                           command->path, strerror(-err),
1815                                    "ERRNO=%d", -err,
1816                                    NULL);
1817                         log_close();
1818                 }
1819
1820                 _exit(r);
1821         }
1822
1823         log_struct_unit(LOG_DEBUG,
1824                         unit_id,
1825                         "MESSAGE=Forked %s as "PID_FMT,
1826                         command->path, pid,
1827                         NULL);
1828
1829         /* We add the new process to the cgroup both in the child (so
1830          * that we can be sure that no user code is ever executed
1831          * outside of the cgroup) and in the parent (so that we can be
1832          * sure that when we kill the cgroup the process will be
1833          * killed too). */
1834         if (cgroup_path)
1835                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1836
1837         exec_status_start(&command->exec_status, pid);
1838
1839         *ret = pid;
1840         return 0;
1841 }
1842
1843 void exec_context_init(ExecContext *c) {
1844         assert(c);
1845
1846         c->umask = 0022;
1847         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1848         c->cpu_sched_policy = SCHED_OTHER;
1849         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1850         c->syslog_level_prefix = true;
1851         c->ignore_sigpipe = true;
1852         c->timer_slack_nsec = (nsec_t) -1;
1853         c->personality = 0xffffffffUL;
1854         c->runtime_directory_mode = 0755;
1855 }
1856
1857 void exec_context_done(ExecContext *c) {
1858         unsigned l;
1859
1860         assert(c);
1861
1862         strv_free(c->environment);
1863         c->environment = NULL;
1864
1865         strv_free(c->environment_files);
1866         c->environment_files = NULL;
1867
1868         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1869                 free(c->rlimit[l]);
1870                 c->rlimit[l] = NULL;
1871         }
1872
1873         free(c->working_directory);
1874         c->working_directory = NULL;
1875         free(c->root_directory);
1876         c->root_directory = NULL;
1877
1878         free(c->tty_path);
1879         c->tty_path = NULL;
1880
1881         free(c->tcpwrap_name);
1882         c->tcpwrap_name = NULL;
1883
1884         free(c->syslog_identifier);
1885         c->syslog_identifier = NULL;
1886
1887         free(c->user);
1888         c->user = NULL;
1889
1890         free(c->group);
1891         c->group = NULL;
1892
1893         strv_free(c->supplementary_groups);
1894         c->supplementary_groups = NULL;
1895
1896         free(c->pam_name);
1897         c->pam_name = NULL;
1898
1899         if (c->capabilities) {
1900                 cap_free(c->capabilities);
1901                 c->capabilities = NULL;
1902         }
1903
1904         strv_free(c->read_only_dirs);
1905         c->read_only_dirs = NULL;
1906
1907         strv_free(c->read_write_dirs);
1908         c->read_write_dirs = NULL;
1909
1910         strv_free(c->inaccessible_dirs);
1911         c->inaccessible_dirs = NULL;
1912
1913         if (c->cpuset)
1914                 CPU_FREE(c->cpuset);
1915
1916         free(c->utmp_id);
1917         c->utmp_id = NULL;
1918
1919         free(c->selinux_context);
1920         c->selinux_context = NULL;
1921
1922         free(c->apparmor_profile);
1923         c->apparmor_profile = NULL;
1924
1925         set_free(c->syscall_filter);
1926         c->syscall_filter = NULL;
1927
1928         set_free(c->syscall_archs);
1929         c->syscall_archs = NULL;
1930
1931         set_free(c->address_families);
1932         c->address_families = NULL;
1933
1934         strv_free(c->runtime_directory);
1935         c->runtime_directory = NULL;
1936 }
1937
1938 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1939         char **i;
1940
1941         assert(c);
1942
1943         if (!runtime_prefix)
1944                 return 0;
1945
1946         STRV_FOREACH(i, c->runtime_directory) {
1947                 _cleanup_free_ char *p;
1948
1949                 p = strjoin(runtime_prefix, "/", *i, NULL);
1950                 if (!p)
1951                         return -ENOMEM;
1952
1953                 /* We execute this synchronously, since we need to be
1954                  * sure this is gone when we start the service
1955                  * next. */
1956                 rm_rf_dangerous(p, false, true, false);
1957         }
1958
1959         return 0;
1960 }
1961
1962 void exec_command_done(ExecCommand *c) {
1963         assert(c);
1964
1965         free(c->path);
1966         c->path = NULL;
1967
1968         strv_free(c->argv);
1969         c->argv = NULL;
1970 }
1971
1972 void exec_command_done_array(ExecCommand *c, unsigned n) {
1973         unsigned i;
1974
1975         for (i = 0; i < n; i++)
1976                 exec_command_done(c+i);
1977 }
1978
1979 void exec_command_free_list(ExecCommand *c) {
1980         ExecCommand *i;
1981
1982         while ((i = c)) {
1983                 LIST_REMOVE(command, c, i);
1984                 exec_command_done(i);
1985                 free(i);
1986         }
1987 }
1988
1989 void exec_command_free_array(ExecCommand **c, unsigned n) {
1990         unsigned i;
1991
1992         for (i = 0; i < n; i++) {
1993                 exec_command_free_list(c[i]);
1994                 c[i] = NULL;
1995         }
1996 }
1997
1998 int exec_context_load_environment(const ExecContext *c, char ***l) {
1999         char **i, **r = NULL;
2000
2001         assert(c);
2002         assert(l);
2003
2004         STRV_FOREACH(i, c->environment_files) {
2005                 char *fn;
2006                 int k;
2007                 bool ignore = false;
2008                 char **p;
2009                 _cleanup_globfree_ glob_t pglob = {};
2010                 int count, n;
2011
2012                 fn = *i;
2013
2014                 if (fn[0] == '-') {
2015                         ignore = true;
2016                         fn ++;
2017                 }
2018
2019                 if (!path_is_absolute(fn)) {
2020                         if (ignore)
2021                                 continue;
2022
2023                         strv_free(r);
2024                         return -EINVAL;
2025                 }
2026
2027                 /* Filename supports globbing, take all matching files */
2028                 errno = 0;
2029                 if (glob(fn, 0, NULL, &pglob) != 0) {
2030                         if (ignore)
2031                                 continue;
2032
2033                         strv_free(r);
2034                         return errno ? -errno : -EINVAL;
2035                 }
2036                 count = pglob.gl_pathc;
2037                 if (count == 0) {
2038                         if (ignore)
2039                                 continue;
2040
2041                         strv_free(r);
2042                         return -EINVAL;
2043                 }
2044                 for (n = 0; n < count; n++) {
2045                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2046                         if (k < 0) {
2047                                 if (ignore)
2048                                         continue;
2049
2050                                 strv_free(r);
2051                                 return k;
2052                         }
2053                         /* Log invalid environment variables with filename */
2054                         if (p)
2055                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2056
2057                         if (r == NULL)
2058                                 r = p;
2059                         else {
2060                                 char **m;
2061
2062                                 m = strv_env_merge(2, r, p);
2063                                 strv_free(r);
2064                                 strv_free(p);
2065                                 if (!m)
2066                                         return -ENOMEM;
2067
2068                                 r = m;
2069                         }
2070                 }
2071         }
2072
2073         *l = r;
2074
2075         return 0;
2076 }
2077
2078 static bool tty_may_match_dev_console(const char *tty) {
2079         char *active = NULL, *console;
2080         bool b;
2081
2082         if (startswith(tty, "/dev/"))
2083                 tty += 5;
2084
2085         /* trivial identity? */
2086         if (streq(tty, "console"))
2087                 return true;
2088
2089         console = resolve_dev_console(&active);
2090         /* if we could not resolve, assume it may */
2091         if (!console)
2092                 return true;
2093
2094         /* "tty0" means the active VC, so it may be the same sometimes */
2095         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2096         free(active);
2097
2098         return b;
2099 }
2100
2101 bool exec_context_may_touch_console(ExecContext *ec) {
2102         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2103                 is_terminal_input(ec->std_input) ||
2104                 is_terminal_output(ec->std_output) ||
2105                 is_terminal_output(ec->std_error)) &&
2106                tty_may_match_dev_console(tty_path(ec));
2107 }
2108
2109 static void strv_fprintf(FILE *f, char **l) {
2110         char **g;
2111
2112         assert(f);
2113
2114         STRV_FOREACH(g, l)
2115                 fprintf(f, " %s", *g);
2116 }
2117
2118 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2119         char **e;
2120         unsigned i;
2121
2122         assert(c);
2123         assert(f);
2124
2125         prefix = strempty(prefix);
2126
2127         fprintf(f,
2128                 "%sUMask: %04o\n"
2129                 "%sWorkingDirectory: %s\n"
2130                 "%sRootDirectory: %s\n"
2131                 "%sNonBlocking: %s\n"
2132                 "%sPrivateTmp: %s\n"
2133                 "%sPrivateNetwork: %s\n"
2134                 "%sPrivateDevices: %s\n"
2135                 "%sIgnoreSIGPIPE: %s\n",
2136                 prefix, c->umask,
2137                 prefix, c->working_directory ? c->working_directory : "/",
2138                 prefix, c->root_directory ? c->root_directory : "/",
2139                 prefix, yes_no(c->non_blocking),
2140                 prefix, yes_no(c->private_tmp),
2141                 prefix, yes_no(c->private_network),
2142                 prefix, yes_no(c->private_devices),
2143                 prefix, yes_no(c->ignore_sigpipe));
2144
2145         STRV_FOREACH(e, c->environment)
2146                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2147
2148         STRV_FOREACH(e, c->environment_files)
2149                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2150
2151         if (c->tcpwrap_name)
2152                 fprintf(f,
2153                         "%sTCPWrapName: %s\n",
2154                         prefix, c->tcpwrap_name);
2155
2156         if (c->nice_set)
2157                 fprintf(f,
2158                         "%sNice: %i\n",
2159                         prefix, c->nice);
2160
2161         if (c->oom_score_adjust_set)
2162                 fprintf(f,
2163                         "%sOOMScoreAdjust: %i\n",
2164                         prefix, c->oom_score_adjust);
2165
2166         for (i = 0; i < RLIM_NLIMITS; i++)
2167                 if (c->rlimit[i])
2168                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
2169
2170         if (c->ioprio_set) {
2171                 _cleanup_free_ char *class_str = NULL;
2172
2173                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2174                 fprintf(f,
2175                         "%sIOSchedulingClass: %s\n"
2176                         "%sIOPriority: %i\n",
2177                         prefix, strna(class_str),
2178                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2179         }
2180
2181         if (c->cpu_sched_set) {
2182                 _cleanup_free_ char *policy_str = NULL;
2183
2184                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2185                 fprintf(f,
2186                         "%sCPUSchedulingPolicy: %s\n"
2187                         "%sCPUSchedulingPriority: %i\n"
2188                         "%sCPUSchedulingResetOnFork: %s\n",
2189                         prefix, strna(policy_str),
2190                         prefix, c->cpu_sched_priority,
2191                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2192         }
2193
2194         if (c->cpuset) {
2195                 fprintf(f, "%sCPUAffinity:", prefix);
2196                 for (i = 0; i < c->cpuset_ncpus; i++)
2197                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2198                                 fprintf(f, " %u", i);
2199                 fputs("\n", f);
2200         }
2201
2202         if (c->timer_slack_nsec != (nsec_t) -1)
2203                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2204
2205         fprintf(f,
2206                 "%sStandardInput: %s\n"
2207                 "%sStandardOutput: %s\n"
2208                 "%sStandardError: %s\n",
2209                 prefix, exec_input_to_string(c->std_input),
2210                 prefix, exec_output_to_string(c->std_output),
2211                 prefix, exec_output_to_string(c->std_error));
2212
2213         if (c->tty_path)
2214                 fprintf(f,
2215                         "%sTTYPath: %s\n"
2216                         "%sTTYReset: %s\n"
2217                         "%sTTYVHangup: %s\n"
2218                         "%sTTYVTDisallocate: %s\n",
2219                         prefix, c->tty_path,
2220                         prefix, yes_no(c->tty_reset),
2221                         prefix, yes_no(c->tty_vhangup),
2222                         prefix, yes_no(c->tty_vt_disallocate));
2223
2224         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2225             c->std_output == EXEC_OUTPUT_KMSG ||
2226             c->std_output == EXEC_OUTPUT_JOURNAL ||
2227             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2228             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2229             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2230             c->std_error == EXEC_OUTPUT_SYSLOG ||
2231             c->std_error == EXEC_OUTPUT_KMSG ||
2232             c->std_error == EXEC_OUTPUT_JOURNAL ||
2233             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2234             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2235             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2236
2237                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2238
2239                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2240                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2241
2242                 fprintf(f,
2243                         "%sSyslogFacility: %s\n"
2244                         "%sSyslogLevel: %s\n",
2245                         prefix, strna(fac_str),
2246                         prefix, strna(lvl_str));
2247         }
2248
2249         if (c->capabilities) {
2250                 _cleanup_cap_free_charp_ char *t;
2251
2252                 t = cap_to_text(c->capabilities, NULL);
2253                 if (t)
2254                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2255         }
2256
2257         if (c->secure_bits)
2258                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2259                         prefix,
2260                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2261                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2262                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2263                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2264                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2265                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2266
2267         if (c->capability_bounding_set_drop) {
2268                 unsigned long l;
2269                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2270
2271                 for (l = 0; l <= cap_last_cap(); l++)
2272                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2273                                 _cleanup_cap_free_charp_ char *t;
2274
2275                                 t = cap_to_name(l);
2276                                 if (t)
2277                                         fprintf(f, " %s", t);
2278                         }
2279
2280                 fputs("\n", f);
2281         }
2282
2283         if (c->user)
2284                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2285         if (c->group)
2286                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2287
2288         if (strv_length(c->supplementary_groups) > 0) {
2289                 fprintf(f, "%sSupplementaryGroups:", prefix);
2290                 strv_fprintf(f, c->supplementary_groups);
2291                 fputs("\n", f);
2292         }
2293
2294         if (c->pam_name)
2295                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2296
2297         if (strv_length(c->read_write_dirs) > 0) {
2298                 fprintf(f, "%sReadWriteDirs:", prefix);
2299                 strv_fprintf(f, c->read_write_dirs);
2300                 fputs("\n", f);
2301         }
2302
2303         if (strv_length(c->read_only_dirs) > 0) {
2304                 fprintf(f, "%sReadOnlyDirs:", prefix);
2305                 strv_fprintf(f, c->read_only_dirs);
2306                 fputs("\n", f);
2307         }
2308
2309         if (strv_length(c->inaccessible_dirs) > 0) {
2310                 fprintf(f, "%sInaccessibleDirs:", prefix);
2311                 strv_fprintf(f, c->inaccessible_dirs);
2312                 fputs("\n", f);
2313         }
2314
2315         if (c->utmp_id)
2316                 fprintf(f,
2317                         "%sUtmpIdentifier: %s\n",
2318                         prefix, c->utmp_id);
2319
2320         if (c->selinux_context)
2321                 fprintf(f,
2322                         "%sSELinuxContext: %s%s\n",
2323                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2324
2325         if (c->personality != 0xffffffffUL)
2326                 fprintf(f,
2327                         "%sPersonality: %s\n",
2328                         prefix, strna(personality_to_string(c->personality)));
2329
2330         if (c->syscall_filter) {
2331 #ifdef HAVE_SECCOMP
2332                 Iterator j;
2333                 void *id;
2334                 bool first = true;
2335 #endif
2336
2337                 fprintf(f,
2338                         "%sSystemCallFilter: ",
2339                         prefix);
2340
2341                 if (!c->syscall_whitelist)
2342                         fputc('~', f);
2343
2344 #ifdef HAVE_SECCOMP
2345                 SET_FOREACH(id, c->syscall_filter, j) {
2346                         _cleanup_free_ char *name = NULL;
2347
2348                         if (first)
2349                                 first = false;
2350                         else
2351                                 fputc(' ', f);
2352
2353                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2354                         fputs(strna(name), f);
2355                 }
2356 #endif
2357
2358                 fputc('\n', f);
2359         }
2360
2361         if (c->syscall_archs) {
2362 #ifdef HAVE_SECCOMP
2363                 Iterator j;
2364                 void *id;
2365 #endif
2366
2367                 fprintf(f,
2368                         "%sSystemCallArchitectures:",
2369                         prefix);
2370
2371 #ifdef HAVE_SECCOMP
2372                 SET_FOREACH(id, c->syscall_archs, j)
2373                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2374 #endif
2375                 fputc('\n', f);
2376         }
2377
2378         if (c->syscall_errno != 0)
2379                 fprintf(f,
2380                         "%sSystemCallErrorNumber: %s\n",
2381                         prefix, strna(errno_to_name(c->syscall_errno)));
2382
2383         if (c->apparmor_profile)
2384                 fprintf(f,
2385                         "%sAppArmorProfile: %s%s\n",
2386                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2387 }
2388
2389 void exec_status_start(ExecStatus *s, pid_t pid) {
2390         assert(s);
2391
2392         zero(*s);
2393         s->pid = pid;
2394         dual_timestamp_get(&s->start_timestamp);
2395 }
2396
2397 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2398         assert(s);
2399
2400         if (s->pid && s->pid != pid)
2401                 zero(*s);
2402
2403         s->pid = pid;
2404         dual_timestamp_get(&s->exit_timestamp);
2405
2406         s->code = code;
2407         s->status = status;
2408
2409         if (context) {
2410                 if (context->utmp_id)
2411                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2412
2413                 exec_context_tty_reset(context);
2414         }
2415 }
2416
2417 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2418         char buf[FORMAT_TIMESTAMP_MAX];
2419
2420         assert(s);
2421         assert(f);
2422
2423         if (!prefix)
2424                 prefix = "";
2425
2426         if (s->pid <= 0)
2427                 return;
2428
2429         fprintf(f,
2430                 "%sPID: "PID_FMT"\n",
2431                 prefix, s->pid);
2432
2433         if (s->start_timestamp.realtime > 0)
2434                 fprintf(f,
2435                         "%sStart Timestamp: %s\n",
2436                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2437
2438         if (s->exit_timestamp.realtime > 0)
2439                 fprintf(f,
2440                         "%sExit Timestamp: %s\n"
2441                         "%sExit Code: %s\n"
2442                         "%sExit Status: %i\n",
2443                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2444                         prefix, sigchld_code_to_string(s->code),
2445                         prefix, s->status);
2446 }
2447
2448 char *exec_command_line(char **argv) {
2449         size_t k;
2450         char *n, *p, **a;
2451         bool first = true;
2452
2453         assert(argv);
2454
2455         k = 1;
2456         STRV_FOREACH(a, argv)
2457                 k += strlen(*a)+3;
2458
2459         if (!(n = new(char, k)))
2460                 return NULL;
2461
2462         p = n;
2463         STRV_FOREACH(a, argv) {
2464
2465                 if (!first)
2466                         *(p++) = ' ';
2467                 else
2468                         first = false;
2469
2470                 if (strpbrk(*a, WHITESPACE)) {
2471                         *(p++) = '\'';
2472                         p = stpcpy(p, *a);
2473                         *(p++) = '\'';
2474                 } else
2475                         p = stpcpy(p, *a);
2476
2477         }
2478
2479         *p = 0;
2480
2481         /* FIXME: this doesn't really handle arguments that have
2482          * spaces and ticks in them */
2483
2484         return n;
2485 }
2486
2487 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2488         char *p2;
2489         const char *prefix2;
2490
2491         char *cmd;
2492
2493         assert(c);
2494         assert(f);
2495
2496         if (!prefix)
2497                 prefix = "";
2498         p2 = strappend(prefix, "\t");
2499         prefix2 = p2 ? p2 : prefix;
2500
2501         cmd = exec_command_line(c->argv);
2502
2503         fprintf(f,
2504                 "%sCommand Line: %s\n",
2505                 prefix, cmd ? cmd : strerror(ENOMEM));
2506
2507         free(cmd);
2508
2509         exec_status_dump(&c->exec_status, f, prefix2);
2510
2511         free(p2);
2512 }
2513
2514 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2515         assert(f);
2516
2517         if (!prefix)
2518                 prefix = "";
2519
2520         LIST_FOREACH(command, c, c)
2521                 exec_command_dump(c, f, prefix);
2522 }
2523
2524 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2525         ExecCommand *end;
2526
2527         assert(l);
2528         assert(e);
2529
2530         if (*l) {
2531                 /* It's kind of important, that we keep the order here */
2532                 LIST_FIND_TAIL(command, *l, end);
2533                 LIST_INSERT_AFTER(command, *l, end, e);
2534         } else
2535               *l = e;
2536 }
2537
2538 int exec_command_set(ExecCommand *c, const char *path, ...) {
2539         va_list ap;
2540         char **l, *p;
2541
2542         assert(c);
2543         assert(path);
2544
2545         va_start(ap, path);
2546         l = strv_new_ap(path, ap);
2547         va_end(ap);
2548
2549         if (!l)
2550                 return -ENOMEM;
2551
2552         p = strdup(path);
2553         if (!p) {
2554                 strv_free(l);
2555                 return -ENOMEM;
2556         }
2557
2558         free(c->path);
2559         c->path = p;
2560
2561         strv_free(c->argv);
2562         c->argv = l;
2563
2564         return 0;
2565 }
2566
2567 static int exec_runtime_allocate(ExecRuntime **rt) {
2568
2569         if (*rt)
2570                 return 0;
2571
2572         *rt = new0(ExecRuntime, 1);
2573         if (!*rt)
2574                 return -ENOMEM;
2575
2576         (*rt)->n_ref = 1;
2577         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2578
2579         return 0;
2580 }
2581
2582 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2583         int r;
2584
2585         assert(rt);
2586         assert(c);
2587         assert(id);
2588
2589         if (*rt)
2590                 return 1;
2591
2592         if (!c->private_network && !c->private_tmp)
2593                 return 0;
2594
2595         r = exec_runtime_allocate(rt);
2596         if (r < 0)
2597                 return r;
2598
2599         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2600                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2601                         return -errno;
2602         }
2603
2604         if (c->private_tmp && !(*rt)->tmp_dir) {
2605                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2606                 if (r < 0)
2607                         return r;
2608         }
2609
2610         return 1;
2611 }
2612
2613 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2614         assert(r);
2615         assert(r->n_ref > 0);
2616
2617         r->n_ref++;
2618         return r;
2619 }
2620
2621 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2622
2623         if (!r)
2624                 return NULL;
2625
2626         assert(r->n_ref > 0);
2627
2628         r->n_ref--;
2629         if (r->n_ref <= 0) {
2630                 free(r->tmp_dir);
2631                 free(r->var_tmp_dir);
2632                 close_pipe(r->netns_storage_socket);
2633                 free(r);
2634         }
2635
2636         return NULL;
2637 }
2638
2639 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2640         assert(u);
2641         assert(f);
2642         assert(fds);
2643
2644         if (!rt)
2645                 return 0;
2646
2647         if (rt->tmp_dir)
2648                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2649
2650         if (rt->var_tmp_dir)
2651                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2652
2653         if (rt->netns_storage_socket[0] >= 0) {
2654                 int copy;
2655
2656                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2657                 if (copy < 0)
2658                         return copy;
2659
2660                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2661         }
2662
2663         if (rt->netns_storage_socket[1] >= 0) {
2664                 int copy;
2665
2666                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2667                 if (copy < 0)
2668                         return copy;
2669
2670                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2671         }
2672
2673         return 0;
2674 }
2675
2676 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2677         int r;
2678
2679         assert(rt);
2680         assert(key);
2681         assert(value);
2682
2683         if (streq(key, "tmp-dir")) {
2684                 char *copy;
2685
2686                 r = exec_runtime_allocate(rt);
2687                 if (r < 0)
2688                         return r;
2689
2690                 copy = strdup(value);
2691                 if (!copy)
2692                         return log_oom();
2693
2694                 free((*rt)->tmp_dir);
2695                 (*rt)->tmp_dir = copy;
2696
2697         } else if (streq(key, "var-tmp-dir")) {
2698                 char *copy;
2699
2700                 r = exec_runtime_allocate(rt);
2701                 if (r < 0)
2702                         return r;
2703
2704                 copy = strdup(value);
2705                 if (!copy)
2706                         return log_oom();
2707
2708                 free((*rt)->var_tmp_dir);
2709                 (*rt)->var_tmp_dir = copy;
2710
2711         } else if (streq(key, "netns-socket-0")) {
2712                 int fd;
2713
2714                 r = exec_runtime_allocate(rt);
2715                 if (r < 0)
2716                         return r;
2717
2718                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2719                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2720                 else {
2721                         safe_close((*rt)->netns_storage_socket[0]);
2722                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2723                 }
2724         } else if (streq(key, "netns-socket-1")) {
2725                 int fd;
2726
2727                 r = exec_runtime_allocate(rt);
2728                 if (r < 0)
2729                         return r;
2730
2731                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2732                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2733                 else {
2734                         safe_close((*rt)->netns_storage_socket[1]);
2735                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2736                 }
2737         } else
2738                 return 0;
2739
2740         return 1;
2741 }
2742
2743 static void *remove_tmpdir_thread(void *p) {
2744         _cleanup_free_ char *path = p;
2745
2746         rm_rf_dangerous(path, false, true, false);
2747         return NULL;
2748 }
2749
2750 void exec_runtime_destroy(ExecRuntime *rt) {
2751         int r;
2752
2753         if (!rt)
2754                 return;
2755
2756         /* If there are multiple users of this, let's leave the stuff around */
2757         if (rt->n_ref > 1)
2758                 return;
2759
2760         if (rt->tmp_dir) {
2761                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2762
2763                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2764                 if (r < 0) {
2765                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2766                         free(rt->tmp_dir);
2767                 }
2768
2769                 rt->tmp_dir = NULL;
2770         }
2771
2772         if (rt->var_tmp_dir) {
2773                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2774
2775                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2776                 if (r < 0) {
2777                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2778                         free(rt->var_tmp_dir);
2779                 }
2780
2781                 rt->var_tmp_dir = NULL;
2782         }
2783
2784         close_pipe(rt->netns_storage_socket);
2785 }
2786
2787 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2788         [EXEC_INPUT_NULL] = "null",
2789         [EXEC_INPUT_TTY] = "tty",
2790         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2791         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2792         [EXEC_INPUT_SOCKET] = "socket"
2793 };
2794
2795 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2796
2797 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2798         [EXEC_OUTPUT_INHERIT] = "inherit",
2799         [EXEC_OUTPUT_NULL] = "null",
2800         [EXEC_OUTPUT_TTY] = "tty",
2801         [EXEC_OUTPUT_SYSLOG] = "syslog",
2802         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2803         [EXEC_OUTPUT_KMSG] = "kmsg",
2804         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2805         [EXEC_OUTPUT_JOURNAL] = "journal",
2806         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2807         [EXEC_OUTPUT_SOCKET] = "socket"
2808 };
2809
2810 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);