chiark / gitweb /
ca807dc8cb2fa86ff9b2afbba8dc9ecc70d88fe8
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "tcpwrap.h"
73 #include "exit-status.h"
74 #include "missing.h"
75 #include "utmp-wtmp.h"
76 #include "def.h"
77 #include "path-util.h"
78 #include "env-util.h"
79 #include "fileio.h"
80 #include "unit.h"
81 #include "async.h"
82 #include "selinux-util.h"
83 #include "errno-list.h"
84 #include "af-list.h"
85 #include "mkdir.h"
86 #include "apparmor-util.h"
87
88 #ifdef HAVE_SECCOMP
89 #include "seccomp-util.h"
90 #endif
91
92 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
93 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94
95 /* This assumes there is a 'tty' group */
96 #define TTY_MODE 0620
97
98 #define SNDBUF_SIZE (8*1024*1024)
99
100 static int shift_fds(int fds[], unsigned n_fds) {
101         int start, restart_from;
102
103         if (n_fds <= 0)
104                 return 0;
105
106         /* Modifies the fds array! (sorts it) */
107
108         assert(fds);
109
110         start = 0;
111         for (;;) {
112                 int i;
113
114                 restart_from = -1;
115
116                 for (i = start; i < (int) n_fds; i++) {
117                         int nfd;
118
119                         /* Already at right index? */
120                         if (fds[i] == i+3)
121                                 continue;
122
123                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
124                                 return -errno;
125
126                         close_nointr_nofail(fds[i]);
127                         fds[i] = nfd;
128
129                         /* Hmm, the fd we wanted isn't free? Then
130                          * let's remember that and try again from here*/
131                         if (nfd != i+3 && restart_from < 0)
132                                 restart_from = i;
133                 }
134
135                 if (restart_from < 0)
136                         break;
137
138                 start = restart_from;
139         }
140
141         return 0;
142 }
143
144 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
145         unsigned i;
146         int r;
147
148         if (n_fds <= 0)
149                 return 0;
150
151         assert(fds);
152
153         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154
155         for (i = 0; i < n_fds; i++) {
156
157                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
158                         return r;
159
160                 /* We unconditionally drop FD_CLOEXEC from the fds,
161                  * since after all we want to pass these fds to our
162                  * children */
163
164                 if ((r = fd_cloexec(fds[i], false)) < 0)
165                         return r;
166         }
167
168         return 0;
169 }
170
171 _pure_ static const char *tty_path(const ExecContext *context) {
172         assert(context);
173
174         if (context->tty_path)
175                 return context->tty_path;
176
177         return "/dev/console";
178 }
179
180 static void exec_context_tty_reset(const ExecContext *context) {
181         assert(context);
182
183         if (context->tty_vhangup)
184                 terminal_vhangup(tty_path(context));
185
186         if (context->tty_reset)
187                 reset_terminal(tty_path(context));
188
189         if (context->tty_vt_disallocate && context->tty_path)
190                 vt_disallocate(context->tty_path);
191 }
192
193 static bool is_terminal_output(ExecOutput o) {
194         return
195                 o == EXEC_OUTPUT_TTY ||
196                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
198                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
199 }
200
201 static int open_null_as(int flags, int nfd) {
202         int fd, r;
203
204         assert(nfd >= 0);
205
206         fd = open("/dev/null", flags|O_NOCTTY);
207         if (fd < 0)
208                 return -errno;
209
210         if (fd != nfd) {
211                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
212                 close_nointr_nofail(fd);
213         } else
214                 r = nfd;
215
216         return r;
217 }
218
219 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220         int fd, r;
221         union sockaddr_union sa = {
222                 .un.sun_family = AF_UNIX,
223                 .un.sun_path = "/run/systemd/journal/stdout",
224         };
225
226         assert(context);
227         assert(output < _EXEC_OUTPUT_MAX);
228         assert(ident);
229         assert(nfd >= 0);
230
231         fd = socket(AF_UNIX, SOCK_STREAM, 0);
232         if (fd < 0)
233                 return -errno;
234
235         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
236         if (r < 0) {
237                 close_nointr_nofail(fd);
238                 return -errno;
239         }
240
241         if (shutdown(fd, SHUT_RD) < 0) {
242                 close_nointr_nofail(fd);
243                 return -errno;
244         }
245
246         fd_inc_sndbuf(fd, SNDBUF_SIZE);
247
248         dprintf(fd,
249                 "%s\n"
250                 "%s\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n",
256                 context->syslog_identifier ? context->syslog_identifier : ident,
257                 unit_id,
258                 context->syslog_priority,
259                 !!context->syslog_level_prefix,
260                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
261                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
262                 is_terminal_output(output));
263
264         if (fd != nfd) {
265                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
266                 close_nointr_nofail(fd);
267         } else
268                 r = nfd;
269
270         return r;
271 }
272 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
273         int fd, r;
274
275         assert(path);
276         assert(nfd >= 0);
277
278         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
279                 return fd;
280
281         if (fd != nfd) {
282                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
283                 close_nointr_nofail(fd);
284         } else
285                 r = nfd;
286
287         return r;
288 }
289
290 static bool is_terminal_input(ExecInput i) {
291         return
292                 i == EXEC_INPUT_TTY ||
293                 i == EXEC_INPUT_TTY_FORCE ||
294                 i == EXEC_INPUT_TTY_FAIL;
295 }
296
297 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298
299         if (is_terminal_input(std_input) && !apply_tty_stdin)
300                 return EXEC_INPUT_NULL;
301
302         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
303                 return EXEC_INPUT_NULL;
304
305         return std_input;
306 }
307
308 static int fixup_output(ExecOutput std_output, int socket_fd) {
309
310         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
311                 return EXEC_OUTPUT_INHERIT;
312
313         return std_output;
314 }
315
316 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
317         ExecInput i;
318
319         assert(context);
320
321         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
322
323         switch (i) {
324
325         case EXEC_INPUT_NULL:
326                 return open_null_as(O_RDONLY, STDIN_FILENO);
327
328         case EXEC_INPUT_TTY:
329         case EXEC_INPUT_TTY_FORCE:
330         case EXEC_INPUT_TTY_FAIL: {
331                 int fd, r;
332
333                 fd = acquire_terminal(tty_path(context),
334                                       i == EXEC_INPUT_TTY_FAIL,
335                                       i == EXEC_INPUT_TTY_FORCE,
336                                       false,
337                                       (usec_t) -1);
338                 if (fd < 0)
339                         return fd;
340
341                 if (fd != STDIN_FILENO) {
342                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
343                         close_nointr_nofail(fd);
344                 } else
345                         r = STDIN_FILENO;
346
347                 return r;
348         }
349
350         case EXEC_INPUT_SOCKET:
351                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
352
353         default:
354                 assert_not_reached("Unknown input type");
355         }
356 }
357
358 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
359         ExecOutput o;
360         ExecInput i;
361         int r;
362
363         assert(context);
364         assert(ident);
365
366         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
367         o = fixup_output(context->std_output, socket_fd);
368
369         if (fileno == STDERR_FILENO) {
370                 ExecOutput e;
371                 e = fixup_output(context->std_error, socket_fd);
372
373                 /* This expects the input and output are already set up */
374
375                 /* Don't change the stderr file descriptor if we inherit all
376                  * the way and are not on a tty */
377                 if (e == EXEC_OUTPUT_INHERIT &&
378                     o == EXEC_OUTPUT_INHERIT &&
379                     i == EXEC_INPUT_NULL &&
380                     !is_terminal_input(context->std_input) &&
381                     getppid () != 1)
382                         return fileno;
383
384                 /* Duplicate from stdout if possible */
385                 if (e == o || e == EXEC_OUTPUT_INHERIT)
386                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
387
388                 o = e;
389
390         } else if (o == EXEC_OUTPUT_INHERIT) {
391                 /* If input got downgraded, inherit the original value */
392                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
393                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394
395                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
396                 if (i != EXEC_INPUT_NULL)
397                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398
399                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
400                 if (getppid() != 1)
401                         return fileno;
402
403                 /* We need to open /dev/null here anew, to get the right access mode. */
404                 return open_null_as(O_WRONLY, fileno);
405         }
406
407         switch (o) {
408
409         case EXEC_OUTPUT_NULL:
410                 return open_null_as(O_WRONLY, fileno);
411
412         case EXEC_OUTPUT_TTY:
413                 if (is_terminal_input(i))
414                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415
416                 /* We don't reset the terminal if this is just about output */
417                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418
419         case EXEC_OUTPUT_SYSLOG:
420         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421         case EXEC_OUTPUT_KMSG:
422         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423         case EXEC_OUTPUT_JOURNAL:
424         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425                 r = connect_logger_as(context, o, ident, unit_id, fileno);
426                 if (r < 0) {
427                         log_struct_unit(LOG_CRIT, unit_id,
428                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
429                                 fileno == STDOUT_FILENO ? "out" : "err",
430                                 unit_id, strerror(-r),
431                                 "ERRNO=%d", -r,
432                                 NULL);
433                         r = open_null_as(O_WRONLY, fileno);
434                 }
435                 return r;
436
437         case EXEC_OUTPUT_SOCKET:
438                 assert(socket_fd >= 0);
439                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
440
441         default:
442                 assert_not_reached("Unknown error type");
443         }
444 }
445
446 static int chown_terminal(int fd, uid_t uid) {
447         struct stat st;
448
449         assert(fd >= 0);
450
451         /* This might fail. What matters are the results. */
452         (void) fchown(fd, uid, -1);
453         (void) fchmod(fd, TTY_MODE);
454
455         if (fstat(fd, &st) < 0)
456                 return -errno;
457
458         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
459                 return -EPERM;
460
461         return 0;
462 }
463
464 static int setup_confirm_stdio(int *_saved_stdin,
465                                int *_saved_stdout) {
466         int fd = -1, saved_stdin, saved_stdout = -1, r;
467
468         assert(_saved_stdin);
469         assert(_saved_stdout);
470
471         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
472         if (saved_stdin < 0)
473                 return -errno;
474
475         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
476         if (saved_stdout < 0) {
477                 r = errno;
478                 goto fail;
479         }
480
481         fd = acquire_terminal(
482                         "/dev/console",
483                         false,
484                         false,
485                         false,
486                         DEFAULT_CONFIRM_USEC);
487         if (fd < 0) {
488                 r = fd;
489                 goto fail;
490         }
491
492         r = chown_terminal(fd, getuid());
493         if (r < 0)
494                 goto fail;
495
496         if (dup2(fd, STDIN_FILENO) < 0) {
497                 r = -errno;
498                 goto fail;
499         }
500
501         if (dup2(fd, STDOUT_FILENO) < 0) {
502                 r = -errno;
503                 goto fail;
504         }
505
506         if (fd >= 2)
507                 close_nointr_nofail(fd);
508
509         *_saved_stdin = saved_stdin;
510         *_saved_stdout = saved_stdout;
511
512         return 0;
513
514 fail:
515         if (saved_stdout >= 0)
516                 close_nointr_nofail(saved_stdout);
517
518         if (saved_stdin >= 0)
519                 close_nointr_nofail(saved_stdin);
520
521         if (fd >= 0)
522                 close_nointr_nofail(fd);
523
524         return r;
525 }
526
527 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
528         int fd;
529         va_list ap;
530
531         assert(format);
532
533         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
534         if (fd < 0)
535                 return fd;
536
537         va_start(ap, format);
538         vdprintf(fd, format, ap);
539         va_end(ap);
540
541         close_nointr_nofail(fd);
542
543         return 0;
544 }
545
546 static int restore_confirm_stdio(int *saved_stdin,
547                                  int *saved_stdout) {
548
549         int r = 0;
550
551         assert(saved_stdin);
552         assert(saved_stdout);
553
554         release_terminal();
555
556         if (*saved_stdin >= 0)
557                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
558                         r = -errno;
559
560         if (*saved_stdout >= 0)
561                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
562                         r = -errno;
563
564         if (*saved_stdin >= 0)
565                 close_nointr_nofail(*saved_stdin);
566
567         if (*saved_stdout >= 0)
568                 close_nointr_nofail(*saved_stdout);
569
570         return r;
571 }
572
573 static int ask_for_confirmation(char *response, char **argv) {
574         int saved_stdout = -1, saved_stdin = -1, r;
575         char *line;
576
577         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
578         if (r < 0)
579                 return r;
580
581         line = exec_command_line(argv);
582         if (!line)
583                 return -ENOMEM;
584
585         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
586         free(line);
587
588         restore_confirm_stdio(&saved_stdin, &saved_stdout);
589
590         return r;
591 }
592
593 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
594         bool keep_groups = false;
595         int r;
596
597         assert(context);
598
599         /* Lookup and set GID and supplementary group list. Here too
600          * we avoid NSS lookups for gid=0. */
601
602         if (context->group || username) {
603
604                 if (context->group) {
605                         const char *g = context->group;
606
607                         if ((r = get_group_creds(&g, &gid)) < 0)
608                                 return r;
609                 }
610
611                 /* First step, initialize groups from /etc/groups */
612                 if (username && gid != 0) {
613                         if (initgroups(username, gid) < 0)
614                                 return -errno;
615
616                         keep_groups = true;
617                 }
618
619                 /* Second step, set our gids */
620                 if (setresgid(gid, gid, gid) < 0)
621                         return -errno;
622         }
623
624         if (context->supplementary_groups) {
625                 int ngroups_max, k;
626                 gid_t *gids;
627                 char **i;
628
629                 /* Final step, initialize any manually set supplementary groups */
630                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
631
632                 if (!(gids = new(gid_t, ngroups_max)))
633                         return -ENOMEM;
634
635                 if (keep_groups) {
636                         if ((k = getgroups(ngroups_max, gids)) < 0) {
637                                 free(gids);
638                                 return -errno;
639                         }
640                 } else
641                         k = 0;
642
643                 STRV_FOREACH(i, context->supplementary_groups) {
644                         const char *g;
645
646                         if (k >= ngroups_max) {
647                                 free(gids);
648                                 return -E2BIG;
649                         }
650
651                         g = *i;
652                         r = get_group_creds(&g, gids+k);
653                         if (r < 0) {
654                                 free(gids);
655                                 return r;
656                         }
657
658                         k++;
659                 }
660
661                 if (setgroups(k, gids) < 0) {
662                         free(gids);
663                         return -errno;
664                 }
665
666                 free(gids);
667         }
668
669         return 0;
670 }
671
672 static int enforce_user(const ExecContext *context, uid_t uid) {
673         assert(context);
674
675         /* Sets (but doesn't lookup) the uid and make sure we keep the
676          * capabilities while doing so. */
677
678         if (context->capabilities) {
679                 _cleanup_cap_free_ cap_t d = NULL;
680                 static const cap_value_t bits[] = {
681                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
682                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
683                 };
684
685                 /* First step: If we need to keep capabilities but
686                  * drop privileges we need to make sure we keep our
687                  * caps, while we drop privileges. */
688                 if (uid != 0) {
689                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
690
691                         if (prctl(PR_GET_SECUREBITS) != sb)
692                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
693                                         return -errno;
694                 }
695
696                 /* Second step: set the capabilities. This will reduce
697                  * the capabilities to the minimum we need. */
698
699                 d = cap_dup(context->capabilities);
700                 if (!d)
701                         return -errno;
702
703                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
704                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
705                         return -errno;
706
707                 if (cap_set_proc(d) < 0)
708                         return -errno;
709         }
710
711         /* Third step: actually set the uids */
712         if (setresuid(uid, uid, uid) < 0)
713                 return -errno;
714
715         /* At this point we should have all necessary capabilities but
716            are otherwise a normal user. However, the caps might got
717            corrupted due to the setresuid() so we need clean them up
718            later. This is done outside of this call. */
719
720         return 0;
721 }
722
723 #ifdef HAVE_PAM
724
725 static int null_conv(
726                 int num_msg,
727                 const struct pam_message **msg,
728                 struct pam_response **resp,
729                 void *appdata_ptr) {
730
731         /* We don't support conversations */
732
733         return PAM_CONV_ERR;
734 }
735
736 static int setup_pam(
737                 const char *name,
738                 const char *user,
739                 uid_t uid,
740                 const char *tty,
741                 char ***pam_env,
742                 int fds[], unsigned n_fds) {
743
744         static const struct pam_conv conv = {
745                 .conv = null_conv,
746                 .appdata_ptr = NULL
747         };
748
749         pam_handle_t *handle = NULL;
750         sigset_t ss, old_ss;
751         int pam_code = PAM_SUCCESS;
752         int err;
753         char **e = NULL;
754         bool close_session = false;
755         pid_t pam_pid = 0, parent_pid;
756         int flags = 0;
757
758         assert(name);
759         assert(user);
760         assert(pam_env);
761
762         /* We set up PAM in the parent process, then fork. The child
763          * will then stay around until killed via PR_GET_PDEATHSIG or
764          * systemd via the cgroup logic. It will then remove the PAM
765          * session again. The parent process will exec() the actual
766          * daemon. We do things this way to ensure that the main PID
767          * of the daemon is the one we initially fork()ed. */
768
769         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
770                 flags |= PAM_SILENT;
771
772         pam_code = pam_start(name, user, &conv, &handle);
773         if (pam_code != PAM_SUCCESS) {
774                 handle = NULL;
775                 goto fail;
776         }
777
778         if (tty) {
779                 pam_code = pam_set_item(handle, PAM_TTY, tty);
780                 if (pam_code != PAM_SUCCESS)
781                         goto fail;
782         }
783
784         pam_code = pam_acct_mgmt(handle, flags);
785         if (pam_code != PAM_SUCCESS)
786                 goto fail;
787
788         pam_code = pam_open_session(handle, flags);
789         if (pam_code != PAM_SUCCESS)
790                 goto fail;
791
792         close_session = true;
793
794         e = pam_getenvlist(handle);
795         if (!e) {
796                 pam_code = PAM_BUF_ERR;
797                 goto fail;
798         }
799
800         /* Block SIGTERM, so that we know that it won't get lost in
801          * the child */
802         if (sigemptyset(&ss) < 0 ||
803             sigaddset(&ss, SIGTERM) < 0 ||
804             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
805                 goto fail;
806
807         parent_pid = getpid();
808
809         pam_pid = fork();
810         if (pam_pid < 0)
811                 goto fail;
812
813         if (pam_pid == 0) {
814                 int sig;
815                 int r = EXIT_PAM;
816
817                 /* The child's job is to reset the PAM session on
818                  * termination */
819
820                 /* This string must fit in 10 chars (i.e. the length
821                  * of "/sbin/init"), to look pretty in /bin/ps */
822                 rename_process("(sd-pam)");
823
824                 /* Make sure we don't keep open the passed fds in this
825                 child. We assume that otherwise only those fds are
826                 open here that have been opened by PAM. */
827                 close_many(fds, n_fds);
828
829                 /* Drop privileges - we don't need any to pam_close_session
830                  * and this will make PR_SET_PDEATHSIG work in most cases.
831                  * If this fails, ignore the error - but expect sd-pam threads
832                  * to fail to exit normally */
833                 if (setresuid(uid, uid, uid) < 0)
834                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
835
836                 /* Wait until our parent died. This will only work if
837                  * the above setresuid() succeeds, otherwise the kernel
838                  * will not allow unprivileged parents kill their privileged
839                  * children this way. We rely on the control groups kill logic
840                  * to do the rest for us. */
841                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
842                         goto child_finish;
843
844                 /* Check if our parent process might already have
845                  * died? */
846                 if (getppid() == parent_pid) {
847                         for (;;) {
848                                 if (sigwait(&ss, &sig) < 0) {
849                                         if (errno == EINTR)
850                                                 continue;
851
852                                         goto child_finish;
853                                 }
854
855                                 assert(sig == SIGTERM);
856                                 break;
857                         }
858                 }
859
860                 /* If our parent died we'll end the session */
861                 if (getppid() != parent_pid) {
862                         pam_code = pam_close_session(handle, flags);
863                         if (pam_code != PAM_SUCCESS)
864                                 goto child_finish;
865                 }
866
867                 r = 0;
868
869         child_finish:
870                 pam_end(handle, pam_code | flags);
871                 _exit(r);
872         }
873
874         /* If the child was forked off successfully it will do all the
875          * cleanups, so forget about the handle here. */
876         handle = NULL;
877
878         /* Unblock SIGTERM again in the parent */
879         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
880                 goto fail;
881
882         /* We close the log explicitly here, since the PAM modules
883          * might have opened it, but we don't want this fd around. */
884         closelog();
885
886         *pam_env = e;
887         e = NULL;
888
889         return 0;
890
891 fail:
892         if (pam_code != PAM_SUCCESS) {
893                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
894                 err = -EPERM;  /* PAM errors do not map to errno */
895         } else {
896                 log_error("PAM failed: %m");
897                 err = -errno;
898         }
899
900         if (handle) {
901                 if (close_session)
902                         pam_code = pam_close_session(handle, flags);
903
904                 pam_end(handle, pam_code | flags);
905         }
906
907         strv_free(e);
908
909         closelog();
910
911         if (pam_pid > 1) {
912                 kill(pam_pid, SIGTERM);
913                 kill(pam_pid, SIGCONT);
914         }
915
916         return err;
917 }
918 #endif
919
920 static void rename_process_from_path(const char *path) {
921         char process_name[11];
922         const char *p;
923         size_t l;
924
925         /* This resulting string must fit in 10 chars (i.e. the length
926          * of "/sbin/init") to look pretty in /bin/ps */
927
928         p = basename(path);
929         if (isempty(p)) {
930                 rename_process("(...)");
931                 return;
932         }
933
934         l = strlen(p);
935         if (l > 8) {
936                 /* The end of the process name is usually more
937                  * interesting, since the first bit might just be
938                  * "systemd-" */
939                 p = p + l - 8;
940                 l = 8;
941         }
942
943         process_name[0] = '(';
944         memcpy(process_name+1, p, l);
945         process_name[1+l] = ')';
946         process_name[1+l+1] = 0;
947
948         rename_process(process_name);
949 }
950
951 #ifdef HAVE_SECCOMP
952
953 static int apply_seccomp(ExecContext *c) {
954         uint32_t negative_action, action;
955         scmp_filter_ctx *seccomp;
956         Iterator i;
957         void *id;
958         int r;
959
960         assert(c);
961
962         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
963
964         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
965         if (!seccomp)
966                 return -ENOMEM;
967
968         if (c->syscall_archs) {
969
970                 SET_FOREACH(id, c->syscall_archs, i) {
971                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
972                         if (r == -EEXIST)
973                                 continue;
974                         if (r < 0)
975                                 goto finish;
976                 }
977
978         } else {
979                 r = seccomp_add_secondary_archs(seccomp);
980                 if (r < 0)
981                         goto finish;
982         }
983
984         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
985         SET_FOREACH(id, c->syscall_filter, i) {
986                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
987                 if (r < 0)
988                         goto finish;
989         }
990
991         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
992         if (r < 0)
993                 goto finish;
994
995         r = seccomp_load(seccomp);
996
997 finish:
998         seccomp_release(seccomp);
999         return r;
1000 }
1001
1002 static int apply_address_families(ExecContext *c) {
1003         scmp_filter_ctx *seccomp;
1004         Iterator i;
1005         int r;
1006
1007         assert(c);
1008
1009         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1010         if (!seccomp)
1011                 return -ENOMEM;
1012
1013         r = seccomp_add_secondary_archs(seccomp);
1014         if (r < 0)
1015                 goto finish;
1016
1017         if (c->address_families_whitelist) {
1018                 int af, first = 0, last = 0;
1019                 void *afp;
1020
1021                 /* If this is a whitelist, we first block the address
1022                  * families that are out of range and then everything
1023                  * that is not in the set. First, we find the lowest
1024                  * and highest address family in the set. */
1025
1026                 SET_FOREACH(afp, c->address_families, i) {
1027                         af = PTR_TO_INT(afp);
1028
1029                         if (af <= 0 || af >= af_max())
1030                                 continue;
1031
1032                         if (first == 0 || af < first)
1033                                 first = af;
1034
1035                         if (last == 0 || af > last)
1036                                 last = af;
1037                 }
1038
1039                 assert((first == 0) == (last == 0));
1040
1041                 if (first == 0) {
1042
1043                         /* No entries in the valid range, block everything */
1044                         r = seccomp_rule_add(
1045                                         seccomp,
1046                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1047                                         SCMP_SYS(socket),
1048                                         0);
1049                         if (r < 0)
1050                                 goto finish;
1051
1052                 } else {
1053
1054                         /* Block everything below the first entry */
1055                         r = seccomp_rule_add(
1056                                         seccomp,
1057                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1058                                         SCMP_SYS(socket),
1059                                         1,
1060                                         SCMP_A0(SCMP_CMP_LT, first));
1061                         if (r < 0)
1062                                 goto finish;
1063
1064                         /* Block everything above the last entry */
1065                         r = seccomp_rule_add(
1066                                         seccomp,
1067                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1068                                         SCMP_SYS(socket),
1069                                         1,
1070                                         SCMP_A0(SCMP_CMP_GT, last));
1071                         if (r < 0)
1072                                 goto finish;
1073
1074                         /* Block everything between the first and last
1075                          * entry */
1076                         for (af = 1; af < af_max(); af++) {
1077
1078                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1079                                         continue;
1080
1081                                 r = seccomp_rule_add(
1082                                                 seccomp,
1083                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1084                                                 SCMP_SYS(socket),
1085                                                 1,
1086                                                 SCMP_A0(SCMP_CMP_EQ, af));
1087                                 if (r < 0)
1088                                         goto finish;
1089                         }
1090                 }
1091
1092         } else {
1093                 void *af;
1094
1095                 /* If this is a blacklist, then generate one rule for
1096                  * each address family that are then combined in OR
1097                  * checks. */
1098
1099                 SET_FOREACH(af, c->address_families, i) {
1100
1101                         r = seccomp_rule_add(
1102                                         seccomp,
1103                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1104                                         SCMP_SYS(socket),
1105                                         1,
1106                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1107                         if (r < 0)
1108                                 goto finish;
1109                 }
1110         }
1111
1112         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1113         if (r < 0)
1114                 goto finish;
1115
1116         r = seccomp_load(seccomp);
1117
1118 finish:
1119         seccomp_release(seccomp);
1120         return r;
1121 }
1122
1123 #endif
1124
1125 static void do_idle_pipe_dance(int idle_pipe[4]) {
1126         assert(idle_pipe);
1127
1128         if (idle_pipe[1] >= 0)
1129                 close_nointr_nofail(idle_pipe[1]);
1130         if (idle_pipe[2] >= 0)
1131                 close_nointr_nofail(idle_pipe[2]);
1132
1133         if (idle_pipe[0] >= 0) {
1134                 int r;
1135
1136                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1137
1138                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1139                         /* Signal systemd that we are bored and want to continue. */
1140                         write(idle_pipe[3], "x", 1);
1141
1142                         /* Wait for systemd to react to the signal above. */
1143                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1144                 }
1145
1146                 close_nointr_nofail(idle_pipe[0]);
1147
1148         }
1149
1150         if (idle_pipe[3] >= 0)
1151                 close_nointr_nofail(idle_pipe[3]);
1152 }
1153
1154 static int build_environment(
1155                 ExecContext *c,
1156                 unsigned n_fds,
1157                 usec_t watchdog_usec,
1158                 const char *home,
1159                 const char *username,
1160                 const char *shell,
1161                 char ***ret) {
1162
1163         _cleanup_strv_free_ char **our_env = NULL;
1164         unsigned n_env = 0;
1165         char *x;
1166
1167         assert(c);
1168         assert(ret);
1169
1170         our_env = new0(char*, 10);
1171         if (!our_env)
1172                 return -ENOMEM;
1173
1174         if (n_fds > 0) {
1175                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1176                         return -ENOMEM;
1177                 our_env[n_env++] = x;
1178
1179                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1180                         return -ENOMEM;
1181                 our_env[n_env++] = x;
1182         }
1183
1184         if (watchdog_usec > 0) {
1185                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1186                         return -ENOMEM;
1187                 our_env[n_env++] = x;
1188
1189                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1190                         return -ENOMEM;
1191                 our_env[n_env++] = x;
1192         }
1193
1194         if (home) {
1195                 x = strappend("HOME=", home);
1196                 if (!x)
1197                         return -ENOMEM;
1198                 our_env[n_env++] = x;
1199         }
1200
1201         if (username) {
1202                 x = strappend("LOGNAME=", username);
1203                 if (!x)
1204                         return -ENOMEM;
1205                 our_env[n_env++] = x;
1206
1207                 x = strappend("USER=", username);
1208                 if (!x)
1209                         return -ENOMEM;
1210                 our_env[n_env++] = x;
1211         }
1212
1213         if (shell) {
1214                 x = strappend("SHELL=", shell);
1215                 if (!x)
1216                         return -ENOMEM;
1217                 our_env[n_env++] = x;
1218         }
1219
1220         if (is_terminal_input(c->std_input) ||
1221             c->std_output == EXEC_OUTPUT_TTY ||
1222             c->std_error == EXEC_OUTPUT_TTY ||
1223             c->tty_path) {
1224
1225                 x = strdup(default_term_for_tty(tty_path(c)));
1226                 if (!x)
1227                         return -ENOMEM;
1228                 our_env[n_env++] = x;
1229         }
1230
1231         our_env[n_env++] = NULL;
1232         assert(n_env <= 10);
1233
1234         *ret = our_env;
1235         our_env = NULL;
1236
1237         return 0;
1238 }
1239
1240 int exec_spawn(ExecCommand *command,
1241                char **argv,
1242                ExecContext *context,
1243                int fds[], unsigned n_fds,
1244                char **environment,
1245                bool apply_permissions,
1246                bool apply_chroot,
1247                bool apply_tty_stdin,
1248                bool confirm_spawn,
1249                CGroupControllerMask cgroup_supported,
1250                const char *cgroup_path,
1251                const char *runtime_prefix,
1252                const char *unit_id,
1253                usec_t watchdog_usec,
1254                int idle_pipe[4],
1255                ExecRuntime *runtime,
1256                pid_t *ret) {
1257
1258         _cleanup_strv_free_ char **files_env = NULL;
1259         int socket_fd;
1260         char *line;
1261         pid_t pid;
1262         int r;
1263
1264         assert(command);
1265         assert(context);
1266         assert(ret);
1267         assert(fds || n_fds <= 0);
1268
1269         if (context->std_input == EXEC_INPUT_SOCKET ||
1270             context->std_output == EXEC_OUTPUT_SOCKET ||
1271             context->std_error == EXEC_OUTPUT_SOCKET) {
1272
1273                 if (n_fds != 1)
1274                         return -EINVAL;
1275
1276                 socket_fd = fds[0];
1277
1278                 fds = NULL;
1279                 n_fds = 0;
1280         } else
1281                 socket_fd = -1;
1282
1283         r = exec_context_load_environment(context, &files_env);
1284         if (r < 0) {
1285                 log_struct_unit(LOG_ERR,
1286                            unit_id,
1287                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1288                            "ERRNO=%d", -r,
1289                            NULL);
1290                 return r;
1291         }
1292
1293         if (!argv)
1294                 argv = command->argv;
1295
1296         line = exec_command_line(argv);
1297         if (!line)
1298                 return log_oom();
1299
1300         log_struct_unit(LOG_DEBUG,
1301                         unit_id,
1302                         "EXECUTABLE=%s", command->path,
1303                         "MESSAGE=About to execute: %s", line,
1304                         NULL);
1305         free(line);
1306
1307         pid = fork();
1308         if (pid < 0)
1309                 return -errno;
1310
1311         if (pid == 0) {
1312                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1313                 const char *username = NULL, *home = NULL, *shell = NULL;
1314                 unsigned n_dont_close = 0;
1315                 int dont_close[n_fds + 3];
1316                 uid_t uid = (uid_t) -1;
1317                 gid_t gid = (gid_t) -1;
1318                 sigset_t ss;
1319                 int i, err;
1320
1321                 /* child */
1322
1323                 rename_process_from_path(command->path);
1324
1325                 /* We reset exactly these signals, since they are the
1326                  * only ones we set to SIG_IGN in the main daemon. All
1327                  * others we leave untouched because we set them to
1328                  * SIG_DFL or a valid handler initially, both of which
1329                  * will be demoted to SIG_DFL. */
1330                 default_signals(SIGNALS_CRASH_HANDLER,
1331                                 SIGNALS_IGNORE, -1);
1332
1333                 if (context->ignore_sigpipe)
1334                         ignore_signals(SIGPIPE, -1);
1335
1336                 assert_se(sigemptyset(&ss) == 0);
1337                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1338                         err = -errno;
1339                         r = EXIT_SIGNAL_MASK;
1340                         goto fail_child;
1341                 }
1342
1343                 if (idle_pipe)
1344                         do_idle_pipe_dance(idle_pipe);
1345
1346                 /* Close sockets very early to make sure we don't
1347                  * block init reexecution because it cannot bind its
1348                  * sockets */
1349                 log_forget_fds();
1350
1351                 if (socket_fd >= 0)
1352                         dont_close[n_dont_close++] = socket_fd;
1353                 if (n_fds > 0) {
1354                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1355                         n_dont_close += n_fds;
1356                 }
1357                 if (runtime) {
1358                         if (runtime->netns_storage_socket[0] >= 0)
1359                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1360                         if (runtime->netns_storage_socket[1] >= 0)
1361                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1362                 }
1363
1364                 err = close_all_fds(dont_close, n_dont_close);
1365                 if (err < 0) {
1366                         r = EXIT_FDS;
1367                         goto fail_child;
1368                 }
1369
1370                 if (!context->same_pgrp)
1371                         if (setsid() < 0) {
1372                                 err = -errno;
1373                                 r = EXIT_SETSID;
1374                                 goto fail_child;
1375                         }
1376
1377                 if (context->tcpwrap_name) {
1378                         if (socket_fd >= 0)
1379                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1380                                         err = -EACCES;
1381                                         r = EXIT_TCPWRAP;
1382                                         goto fail_child;
1383                                 }
1384
1385                         for (i = 0; i < (int) n_fds; i++) {
1386                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1387                                         err = -EACCES;
1388                                         r = EXIT_TCPWRAP;
1389                                         goto fail_child;
1390                                 }
1391                         }
1392                 }
1393
1394                 exec_context_tty_reset(context);
1395
1396                 if (confirm_spawn) {
1397                         char response;
1398
1399                         err = ask_for_confirmation(&response, argv);
1400                         if (err == -ETIMEDOUT)
1401                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1402                         else if (err < 0)
1403                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1404                         else if (response == 's') {
1405                                 write_confirm_message("Skipping execution.\n");
1406                                 err = -ECANCELED;
1407                                 r = EXIT_CONFIRM;
1408                                 goto fail_child;
1409                         } else if (response == 'n') {
1410                                 write_confirm_message("Failing execution.\n");
1411                                 err = r = 0;
1412                                 goto fail_child;
1413                         }
1414                 }
1415
1416                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1417                  * must sure to drop O_NONBLOCK */
1418                 if (socket_fd >= 0)
1419                         fd_nonblock(socket_fd, false);
1420
1421                 err = setup_input(context, socket_fd, apply_tty_stdin);
1422                 if (err < 0) {
1423                         r = EXIT_STDIN;
1424                         goto fail_child;
1425                 }
1426
1427                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1428                 if (err < 0) {
1429                         r = EXIT_STDOUT;
1430                         goto fail_child;
1431                 }
1432
1433                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1434                 if (err < 0) {
1435                         r = EXIT_STDERR;
1436                         goto fail_child;
1437                 }
1438
1439                 if (cgroup_path) {
1440                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1441                         if (err < 0) {
1442                                 r = EXIT_CGROUP;
1443                                 goto fail_child;
1444                         }
1445                 }
1446
1447                 if (context->oom_score_adjust_set) {
1448                         char t[16];
1449
1450                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1451                         char_array_0(t);
1452
1453                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1454                                 err = -errno;
1455                                 r = EXIT_OOM_ADJUST;
1456                                 goto fail_child;
1457                         }
1458                 }
1459
1460                 if (context->nice_set)
1461                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1462                                 err = -errno;
1463                                 r = EXIT_NICE;
1464                                 goto fail_child;
1465                         }
1466
1467                 if (context->cpu_sched_set) {
1468                         struct sched_param param = {
1469                                 .sched_priority = context->cpu_sched_priority,
1470                         };
1471
1472                         r = sched_setscheduler(0,
1473                                                context->cpu_sched_policy |
1474                                                (context->cpu_sched_reset_on_fork ?
1475                                                 SCHED_RESET_ON_FORK : 0),
1476                                                &param);
1477                         if (r < 0) {
1478                                 err = -errno;
1479                                 r = EXIT_SETSCHEDULER;
1480                                 goto fail_child;
1481                         }
1482                 }
1483
1484                 if (context->cpuset)
1485                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1486                                 err = -errno;
1487                                 r = EXIT_CPUAFFINITY;
1488                                 goto fail_child;
1489                         }
1490
1491                 if (context->ioprio_set)
1492                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1493                                 err = -errno;
1494                                 r = EXIT_IOPRIO;
1495                                 goto fail_child;
1496                         }
1497
1498                 if (context->timer_slack_nsec != (nsec_t) -1)
1499                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1500                                 err = -errno;
1501                                 r = EXIT_TIMERSLACK;
1502                                 goto fail_child;
1503                         }
1504
1505                 if (context->personality != 0xffffffffUL)
1506                         if (personality(context->personality) < 0) {
1507                                 err = -errno;
1508                                 r = EXIT_PERSONALITY;
1509                                 goto fail_child;
1510                         }
1511
1512                 if (context->utmp_id)
1513                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1514
1515                 if (context->user) {
1516                         username = context->user;
1517                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1518                         if (err < 0) {
1519                                 r = EXIT_USER;
1520                                 goto fail_child;
1521                         }
1522
1523                         if (is_terminal_input(context->std_input)) {
1524                                 err = chown_terminal(STDIN_FILENO, uid);
1525                                 if (err < 0) {
1526                                         r = EXIT_STDIN;
1527                                         goto fail_child;
1528                                 }
1529                         }
1530                 }
1531
1532 #ifdef HAVE_PAM
1533                 if (cgroup_path && context->user && context->pam_name) {
1534                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1535                         if (err < 0) {
1536                                 r = EXIT_CGROUP;
1537                                 goto fail_child;
1538                         }
1539
1540
1541                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1542                         if (err < 0) {
1543                                 r = EXIT_CGROUP;
1544                                 goto fail_child;
1545                         }
1546                 }
1547 #endif
1548
1549                 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1550                         char **rt;
1551
1552                         STRV_FOREACH(rt, context->runtime_directory) {
1553                                 _cleanup_free_ char *p;
1554
1555                                 p = strjoin(runtime_prefix, "/", *rt, NULL);
1556                                 if (!p) {
1557                                         r = EXIT_RUNTIME_DIRECTORY;
1558                                         err = -ENOMEM;
1559                                         goto fail_child;
1560                                 }
1561
1562                                 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1563                                 if (err < 0) {
1564                                         r = EXIT_RUNTIME_DIRECTORY;
1565                                         goto fail_child;
1566                                 }
1567                         }
1568                 }
1569
1570                 if (apply_permissions) {
1571                         err = enforce_groups(context, username, gid);
1572                         if (err < 0) {
1573                                 r = EXIT_GROUP;
1574                                 goto fail_child;
1575                         }
1576                 }
1577
1578                 umask(context->umask);
1579
1580 #ifdef HAVE_PAM
1581                 if (apply_permissions && context->pam_name && username) {
1582                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1583                         if (err < 0) {
1584                                 r = EXIT_PAM;
1585                                 goto fail_child;
1586                         }
1587                 }
1588 #endif
1589                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1590                         err = setup_netns(runtime->netns_storage_socket);
1591                         if (err < 0) {
1592                                 r = EXIT_NETWORK;
1593                                 goto fail_child;
1594                         }
1595                 }
1596
1597                 if (!strv_isempty(context->read_write_dirs) ||
1598                     !strv_isempty(context->read_only_dirs) ||
1599                     !strv_isempty(context->inaccessible_dirs) ||
1600                     context->mount_flags != 0 ||
1601                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1602                     context->private_devices) {
1603
1604                         char *tmp = NULL, *var = NULL;
1605
1606                         /* The runtime struct only contains the parent
1607                          * of the private /tmp, which is
1608                          * non-accessible to world users. Inside of it
1609                          * there's a /tmp that is sticky, and that's
1610                          * the one we want to use here. */
1611
1612                         if (context->private_tmp && runtime) {
1613                                 if (runtime->tmp_dir)
1614                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1615                                 if (runtime->var_tmp_dir)
1616                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1617                         }
1618
1619                         err = setup_namespace(
1620                                         context->read_write_dirs,
1621                                         context->read_only_dirs,
1622                                         context->inaccessible_dirs,
1623                                         tmp,
1624                                         var,
1625                                         context->private_devices,
1626                                         context->mount_flags);
1627
1628                         if (err < 0) {
1629                                 r = EXIT_NAMESPACE;
1630                                 goto fail_child;
1631                         }
1632                 }
1633
1634                 if (apply_chroot) {
1635                         if (context->root_directory)
1636                                 if (chroot(context->root_directory) < 0) {
1637                                         err = -errno;
1638                                         r = EXIT_CHROOT;
1639                                         goto fail_child;
1640                                 }
1641
1642                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1643                                 err = -errno;
1644                                 r = EXIT_CHDIR;
1645                                 goto fail_child;
1646                         }
1647                 } else {
1648                         _cleanup_free_ char *d = NULL;
1649
1650                         if (asprintf(&d, "%s/%s",
1651                                      context->root_directory ? context->root_directory : "",
1652                                      context->working_directory ? context->working_directory : "") < 0) {
1653                                 err = -ENOMEM;
1654                                 r = EXIT_MEMORY;
1655                                 goto fail_child;
1656                         }
1657
1658                         if (chdir(d) < 0) {
1659                                 err = -errno;
1660                                 r = EXIT_CHDIR;
1661                                 goto fail_child;
1662                         }
1663                 }
1664
1665                 /* We repeat the fd closing here, to make sure that
1666                  * nothing is leaked from the PAM modules */
1667                 err = close_all_fds(fds, n_fds);
1668                 if (err >= 0)
1669                         err = shift_fds(fds, n_fds);
1670                 if (err >= 0)
1671                         err = flags_fds(fds, n_fds, context->non_blocking);
1672                 if (err < 0) {
1673                         r = EXIT_FDS;
1674                         goto fail_child;
1675                 }
1676
1677                 if (apply_permissions) {
1678
1679                         for (i = 0; i < _RLIMIT_MAX; i++) {
1680                                 if (!context->rlimit[i])
1681                                         continue;
1682
1683                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1684                                         err = -errno;
1685                                         r = EXIT_LIMITS;
1686                                         goto fail_child;
1687                                 }
1688                         }
1689
1690                         if (context->capability_bounding_set_drop) {
1691                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1692                                 if (err < 0) {
1693                                         r = EXIT_CAPABILITIES;
1694                                         goto fail_child;
1695                                 }
1696                         }
1697
1698                         if (context->user) {
1699                                 err = enforce_user(context, uid);
1700                                 if (err < 0) {
1701                                         r = EXIT_USER;
1702                                         goto fail_child;
1703                                 }
1704                         }
1705
1706                         /* PR_GET_SECUREBITS is not privileged, while
1707                          * PR_SET_SECUREBITS is. So to suppress
1708                          * potential EPERMs we'll try not to call
1709                          * PR_SET_SECUREBITS unless necessary. */
1710                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1711                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1712                                         err = -errno;
1713                                         r = EXIT_SECUREBITS;
1714                                         goto fail_child;
1715                                 }
1716
1717                         if (context->capabilities)
1718                                 if (cap_set_proc(context->capabilities) < 0) {
1719                                         err = -errno;
1720                                         r = EXIT_CAPABILITIES;
1721                                         goto fail_child;
1722                                 }
1723
1724                         if (context->no_new_privileges)
1725                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1726                                         err = -errno;
1727                                         r = EXIT_NO_NEW_PRIVILEGES;
1728                                         goto fail_child;
1729                                 }
1730
1731 #ifdef HAVE_SECCOMP
1732                         if (context->address_families_whitelist ||
1733                             !set_isempty(context->address_families)) {
1734                                 err = apply_address_families(context);
1735                                 if (err < 0) {
1736                                         r = EXIT_ADDRESS_FAMILIES;
1737                                         goto fail_child;
1738                                 }
1739                         }
1740
1741                         if (context->syscall_whitelist ||
1742                             !set_isempty(context->syscall_filter) ||
1743                             !set_isempty(context->syscall_archs)) {
1744                                 err = apply_seccomp(context);
1745                                 if (err < 0) {
1746                                         r = EXIT_SECCOMP;
1747                                         goto fail_child;
1748                                 }
1749                         }
1750 #endif
1751
1752 #ifdef HAVE_SELINUX
1753                         if (context->selinux_context && use_selinux()) {
1754                                 err = setexeccon(context->selinux_context);
1755                                 if (err < 0 && !context->selinux_context_ignore) {
1756                                         r = EXIT_SELINUX_CONTEXT;
1757                                         goto fail_child;
1758                                 }
1759                         }
1760 #endif
1761
1762 #ifdef HAVE_APPARMOR
1763                         if (context->apparmor_profile && use_apparmor()) {
1764                                 err = aa_change_onexec(context->apparmor_profile);
1765                                 if (err < 0 && !context->apparmor_profile_ignore) {
1766                                         r = EXIT_APPARMOR_PROFILE;
1767                                         goto fail_child;
1768                                 }
1769                         }
1770 #endif
1771                 }
1772
1773                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1774                 if (r < 0) {
1775                         r = EXIT_MEMORY;
1776                         goto fail_child;
1777                 }
1778
1779                 final_env = strv_env_merge(5,
1780                                            environment,
1781                                            our_env,
1782                                            context->environment,
1783                                            files_env,
1784                                            pam_env,
1785                                            NULL);
1786                 if (!final_env) {
1787                         err = -ENOMEM;
1788                         r = EXIT_MEMORY;
1789                         goto fail_child;
1790                 }
1791
1792                 final_argv = replace_env_argv(argv, final_env);
1793                 if (!final_argv) {
1794                         err = -ENOMEM;
1795                         r = EXIT_MEMORY;
1796                         goto fail_child;
1797                 }
1798
1799                 final_env = strv_env_clean(final_env);
1800
1801                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1802                         line = exec_command_line(final_argv);
1803                         if (line) {
1804                                 log_open();
1805                                 log_struct_unit(LOG_DEBUG,
1806                                                 unit_id,
1807                                                 "EXECUTABLE=%s", command->path,
1808                                                 "MESSAGE=Executing: %s", line,
1809                                                 NULL);
1810                                 log_close();
1811                                 free(line);
1812                                 line = NULL;
1813                         }
1814                 }
1815                 execve(command->path, final_argv, final_env);
1816                 err = -errno;
1817                 r = EXIT_EXEC;
1818
1819         fail_child:
1820                 if (r != 0) {
1821                         log_open();
1822                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1823                                    "EXECUTABLE=%s", command->path,
1824                                    "MESSAGE=Failed at step %s spawning %s: %s",
1825                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1826                                           command->path, strerror(-err),
1827                                    "ERRNO=%d", -err,
1828                                    NULL);
1829                         log_close();
1830                 }
1831
1832                 _exit(r);
1833         }
1834
1835         log_struct_unit(LOG_DEBUG,
1836                         unit_id,
1837                         "MESSAGE=Forked %s as "PID_FMT,
1838                         command->path, pid,
1839                         NULL);
1840
1841         /* We add the new process to the cgroup both in the child (so
1842          * that we can be sure that no user code is ever executed
1843          * outside of the cgroup) and in the parent (so that we can be
1844          * sure that when we kill the cgroup the process will be
1845          * killed too). */
1846         if (cgroup_path)
1847                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1848
1849         exec_status_start(&command->exec_status, pid);
1850
1851         *ret = pid;
1852         return 0;
1853 }
1854
1855 void exec_context_init(ExecContext *c) {
1856         assert(c);
1857
1858         c->umask = 0022;
1859         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1860         c->cpu_sched_policy = SCHED_OTHER;
1861         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1862         c->syslog_level_prefix = true;
1863         c->ignore_sigpipe = true;
1864         c->timer_slack_nsec = (nsec_t) -1;
1865         c->personality = 0xffffffffUL;
1866         c->runtime_directory_mode = 0755;
1867 }
1868
1869 void exec_context_done(ExecContext *c) {
1870         unsigned l;
1871
1872         assert(c);
1873
1874         strv_free(c->environment);
1875         c->environment = NULL;
1876
1877         strv_free(c->environment_files);
1878         c->environment_files = NULL;
1879
1880         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1881                 free(c->rlimit[l]);
1882                 c->rlimit[l] = NULL;
1883         }
1884
1885         free(c->working_directory);
1886         c->working_directory = NULL;
1887         free(c->root_directory);
1888         c->root_directory = NULL;
1889
1890         free(c->tty_path);
1891         c->tty_path = NULL;
1892
1893         free(c->tcpwrap_name);
1894         c->tcpwrap_name = NULL;
1895
1896         free(c->syslog_identifier);
1897         c->syslog_identifier = NULL;
1898
1899         free(c->user);
1900         c->user = NULL;
1901
1902         free(c->group);
1903         c->group = NULL;
1904
1905         strv_free(c->supplementary_groups);
1906         c->supplementary_groups = NULL;
1907
1908         free(c->pam_name);
1909         c->pam_name = NULL;
1910
1911         if (c->capabilities) {
1912                 cap_free(c->capabilities);
1913                 c->capabilities = NULL;
1914         }
1915
1916         strv_free(c->read_only_dirs);
1917         c->read_only_dirs = NULL;
1918
1919         strv_free(c->read_write_dirs);
1920         c->read_write_dirs = NULL;
1921
1922         strv_free(c->inaccessible_dirs);
1923         c->inaccessible_dirs = NULL;
1924
1925         if (c->cpuset)
1926                 CPU_FREE(c->cpuset);
1927
1928         free(c->utmp_id);
1929         c->utmp_id = NULL;
1930
1931         free(c->selinux_context);
1932         c->selinux_context = NULL;
1933
1934         free(c->apparmor_profile);
1935         c->apparmor_profile = NULL;
1936
1937         set_free(c->syscall_filter);
1938         c->syscall_filter = NULL;
1939
1940         set_free(c->syscall_archs);
1941         c->syscall_archs = NULL;
1942
1943         set_free(c->address_families);
1944         c->address_families = NULL;
1945
1946         strv_free(c->runtime_directory);
1947         c->runtime_directory = NULL;
1948 }
1949
1950 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1951         char **i;
1952
1953         assert(c);
1954
1955         if (!runtime_prefix)
1956                 return 0;
1957
1958         STRV_FOREACH(i, c->runtime_directory) {
1959                 _cleanup_free_ char *p;
1960
1961                 p = strjoin(runtime_prefix, "/", *i, NULL);
1962                 if (!p)
1963                         return -ENOMEM;
1964
1965                 /* We execute this synchronously, since we need to be
1966                  * sure this is gone when we start the service
1967                  * next. */
1968                 rm_rf_dangerous(p, false, true, false);
1969         }
1970
1971         return 0;
1972 }
1973
1974 void exec_command_done(ExecCommand *c) {
1975         assert(c);
1976
1977         free(c->path);
1978         c->path = NULL;
1979
1980         strv_free(c->argv);
1981         c->argv = NULL;
1982 }
1983
1984 void exec_command_done_array(ExecCommand *c, unsigned n) {
1985         unsigned i;
1986
1987         for (i = 0; i < n; i++)
1988                 exec_command_done(c+i);
1989 }
1990
1991 void exec_command_free_list(ExecCommand *c) {
1992         ExecCommand *i;
1993
1994         while ((i = c)) {
1995                 LIST_REMOVE(command, c, i);
1996                 exec_command_done(i);
1997                 free(i);
1998         }
1999 }
2000
2001 void exec_command_free_array(ExecCommand **c, unsigned n) {
2002         unsigned i;
2003
2004         for (i = 0; i < n; i++) {
2005                 exec_command_free_list(c[i]);
2006                 c[i] = NULL;
2007         }
2008 }
2009
2010 int exec_context_load_environment(const ExecContext *c, char ***l) {
2011         char **i, **r = NULL;
2012
2013         assert(c);
2014         assert(l);
2015
2016         STRV_FOREACH(i, c->environment_files) {
2017                 char *fn;
2018                 int k;
2019                 bool ignore = false;
2020                 char **p;
2021                 _cleanup_globfree_ glob_t pglob = {};
2022                 int count, n;
2023
2024                 fn = *i;
2025
2026                 if (fn[0] == '-') {
2027                         ignore = true;
2028                         fn ++;
2029                 }
2030
2031                 if (!path_is_absolute(fn)) {
2032                         if (ignore)
2033                                 continue;
2034
2035                         strv_free(r);
2036                         return -EINVAL;
2037                 }
2038
2039                 /* Filename supports globbing, take all matching files */
2040                 errno = 0;
2041                 if (glob(fn, 0, NULL, &pglob) != 0) {
2042                         if (ignore)
2043                                 continue;
2044
2045                         strv_free(r);
2046                         return errno ? -errno : -EINVAL;
2047                 }
2048                 count = pglob.gl_pathc;
2049                 if (count == 0) {
2050                         if (ignore)
2051                                 continue;
2052
2053                         strv_free(r);
2054                         return -EINVAL;
2055                 }
2056                 for (n = 0; n < count; n++) {
2057                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2058                         if (k < 0) {
2059                                 if (ignore)
2060                                         continue;
2061
2062                                 strv_free(r);
2063                                 return k;
2064                         }
2065                         /* Log invalid environment variables with filename */
2066                         if (p)
2067                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2068
2069                         if (r == NULL)
2070                                 r = p;
2071                         else {
2072                                 char **m;
2073
2074                                 m = strv_env_merge(2, r, p);
2075                                 strv_free(r);
2076                                 strv_free(p);
2077                                 if (!m)
2078                                         return -ENOMEM;
2079
2080                                 r = m;
2081                         }
2082                 }
2083         }
2084
2085         *l = r;
2086
2087         return 0;
2088 }
2089
2090 static bool tty_may_match_dev_console(const char *tty) {
2091         char *active = NULL, *console;
2092         bool b;
2093
2094         if (startswith(tty, "/dev/"))
2095                 tty += 5;
2096
2097         /* trivial identity? */
2098         if (streq(tty, "console"))
2099                 return true;
2100
2101         console = resolve_dev_console(&active);
2102         /* if we could not resolve, assume it may */
2103         if (!console)
2104                 return true;
2105
2106         /* "tty0" means the active VC, so it may be the same sometimes */
2107         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2108         free(active);
2109
2110         return b;
2111 }
2112
2113 bool exec_context_may_touch_console(ExecContext *ec) {
2114         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2115                 is_terminal_input(ec->std_input) ||
2116                 is_terminal_output(ec->std_output) ||
2117                 is_terminal_output(ec->std_error)) &&
2118                tty_may_match_dev_console(tty_path(ec));
2119 }
2120
2121 static void strv_fprintf(FILE *f, char **l) {
2122         char **g;
2123
2124         assert(f);
2125
2126         STRV_FOREACH(g, l)
2127                 fprintf(f, " %s", *g);
2128 }
2129
2130 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2131         char **e;
2132         unsigned i;
2133
2134         assert(c);
2135         assert(f);
2136
2137         prefix = strempty(prefix);
2138
2139         fprintf(f,
2140                 "%sUMask: %04o\n"
2141                 "%sWorkingDirectory: %s\n"
2142                 "%sRootDirectory: %s\n"
2143                 "%sNonBlocking: %s\n"
2144                 "%sPrivateTmp: %s\n"
2145                 "%sPrivateNetwork: %s\n"
2146                 "%sPrivateDevices: %s\n"
2147                 "%sIgnoreSIGPIPE: %s\n",
2148                 prefix, c->umask,
2149                 prefix, c->working_directory ? c->working_directory : "/",
2150                 prefix, c->root_directory ? c->root_directory : "/",
2151                 prefix, yes_no(c->non_blocking),
2152                 prefix, yes_no(c->private_tmp),
2153                 prefix, yes_no(c->private_network),
2154                 prefix, yes_no(c->private_devices),
2155                 prefix, yes_no(c->ignore_sigpipe));
2156
2157         STRV_FOREACH(e, c->environment)
2158                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2159
2160         STRV_FOREACH(e, c->environment_files)
2161                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2162
2163         if (c->tcpwrap_name)
2164                 fprintf(f,
2165                         "%sTCPWrapName: %s\n",
2166                         prefix, c->tcpwrap_name);
2167
2168         if (c->nice_set)
2169                 fprintf(f,
2170                         "%sNice: %i\n",
2171                         prefix, c->nice);
2172
2173         if (c->oom_score_adjust_set)
2174                 fprintf(f,
2175                         "%sOOMScoreAdjust: %i\n",
2176                         prefix, c->oom_score_adjust);
2177
2178         for (i = 0; i < RLIM_NLIMITS; i++)
2179                 if (c->rlimit[i])
2180                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
2181
2182         if (c->ioprio_set) {
2183                 _cleanup_free_ char *class_str = NULL;
2184
2185                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2186                 fprintf(f,
2187                         "%sIOSchedulingClass: %s\n"
2188                         "%sIOPriority: %i\n",
2189                         prefix, strna(class_str),
2190                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2191         }
2192
2193         if (c->cpu_sched_set) {
2194                 _cleanup_free_ char *policy_str = NULL;
2195
2196                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2197                 fprintf(f,
2198                         "%sCPUSchedulingPolicy: %s\n"
2199                         "%sCPUSchedulingPriority: %i\n"
2200                         "%sCPUSchedulingResetOnFork: %s\n",
2201                         prefix, strna(policy_str),
2202                         prefix, c->cpu_sched_priority,
2203                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2204         }
2205
2206         if (c->cpuset) {
2207                 fprintf(f, "%sCPUAffinity:", prefix);
2208                 for (i = 0; i < c->cpuset_ncpus; i++)
2209                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2210                                 fprintf(f, " %u", i);
2211                 fputs("\n", f);
2212         }
2213
2214         if (c->timer_slack_nsec != (nsec_t) -1)
2215                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2216
2217         fprintf(f,
2218                 "%sStandardInput: %s\n"
2219                 "%sStandardOutput: %s\n"
2220                 "%sStandardError: %s\n",
2221                 prefix, exec_input_to_string(c->std_input),
2222                 prefix, exec_output_to_string(c->std_output),
2223                 prefix, exec_output_to_string(c->std_error));
2224
2225         if (c->tty_path)
2226                 fprintf(f,
2227                         "%sTTYPath: %s\n"
2228                         "%sTTYReset: %s\n"
2229                         "%sTTYVHangup: %s\n"
2230                         "%sTTYVTDisallocate: %s\n",
2231                         prefix, c->tty_path,
2232                         prefix, yes_no(c->tty_reset),
2233                         prefix, yes_no(c->tty_vhangup),
2234                         prefix, yes_no(c->tty_vt_disallocate));
2235
2236         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2237             c->std_output == EXEC_OUTPUT_KMSG ||
2238             c->std_output == EXEC_OUTPUT_JOURNAL ||
2239             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2240             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2241             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2242             c->std_error == EXEC_OUTPUT_SYSLOG ||
2243             c->std_error == EXEC_OUTPUT_KMSG ||
2244             c->std_error == EXEC_OUTPUT_JOURNAL ||
2245             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2246             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2247             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2248
2249                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2250
2251                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2252                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2253
2254                 fprintf(f,
2255                         "%sSyslogFacility: %s\n"
2256                         "%sSyslogLevel: %s\n",
2257                         prefix, strna(fac_str),
2258                         prefix, strna(lvl_str));
2259         }
2260
2261         if (c->capabilities) {
2262                 _cleanup_cap_free_charp_ char *t;
2263
2264                 t = cap_to_text(c->capabilities, NULL);
2265                 if (t)
2266                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2267         }
2268
2269         if (c->secure_bits)
2270                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2271                         prefix,
2272                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2273                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2274                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2275                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2276                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2277                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2278
2279         if (c->capability_bounding_set_drop) {
2280                 unsigned long l;
2281                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2282
2283                 for (l = 0; l <= cap_last_cap(); l++)
2284                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2285                                 _cleanup_cap_free_charp_ char *t;
2286
2287                                 t = cap_to_name(l);
2288                                 if (t)
2289                                         fprintf(f, " %s", t);
2290                         }
2291
2292                 fputs("\n", f);
2293         }
2294
2295         if (c->user)
2296                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2297         if (c->group)
2298                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2299
2300         if (strv_length(c->supplementary_groups) > 0) {
2301                 fprintf(f, "%sSupplementaryGroups:", prefix);
2302                 strv_fprintf(f, c->supplementary_groups);
2303                 fputs("\n", f);
2304         }
2305
2306         if (c->pam_name)
2307                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2308
2309         if (strv_length(c->read_write_dirs) > 0) {
2310                 fprintf(f, "%sReadWriteDirs:", prefix);
2311                 strv_fprintf(f, c->read_write_dirs);
2312                 fputs("\n", f);
2313         }
2314
2315         if (strv_length(c->read_only_dirs) > 0) {
2316                 fprintf(f, "%sReadOnlyDirs:", prefix);
2317                 strv_fprintf(f, c->read_only_dirs);
2318                 fputs("\n", f);
2319         }
2320
2321         if (strv_length(c->inaccessible_dirs) > 0) {
2322                 fprintf(f, "%sInaccessibleDirs:", prefix);
2323                 strv_fprintf(f, c->inaccessible_dirs);
2324                 fputs("\n", f);
2325         }
2326
2327         if (c->utmp_id)
2328                 fprintf(f,
2329                         "%sUtmpIdentifier: %s\n",
2330                         prefix, c->utmp_id);
2331
2332         if (c->selinux_context)
2333                 fprintf(f,
2334                         "%sSELinuxContext: %s%s\n",
2335                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2336
2337         if (c->personality != 0xffffffffUL)
2338                 fprintf(f,
2339                         "%sPersonality: %s\n",
2340                         prefix, strna(personality_to_string(c->personality)));
2341
2342         if (c->syscall_filter) {
2343 #ifdef HAVE_SECCOMP
2344                 Iterator j;
2345                 void *id;
2346                 bool first = true;
2347 #endif
2348
2349                 fprintf(f,
2350                         "%sSystemCallFilter: ",
2351                         prefix);
2352
2353                 if (!c->syscall_whitelist)
2354                         fputc('~', f);
2355
2356 #ifdef HAVE_SECCOMP
2357                 SET_FOREACH(id, c->syscall_filter, j) {
2358                         _cleanup_free_ char *name = NULL;
2359
2360                         if (first)
2361                                 first = false;
2362                         else
2363                                 fputc(' ', f);
2364
2365                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2366                         fputs(strna(name), f);
2367                 }
2368 #endif
2369
2370                 fputc('\n', f);
2371         }
2372
2373         if (c->syscall_archs) {
2374 #ifdef HAVE_SECCOMP
2375                 Iterator j;
2376                 void *id;
2377 #endif
2378
2379                 fprintf(f,
2380                         "%sSystemCallArchitectures:",
2381                         prefix);
2382
2383 #ifdef HAVE_SECCOMP
2384                 SET_FOREACH(id, c->syscall_archs, j)
2385                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2386 #endif
2387                 fputc('\n', f);
2388         }
2389
2390         if (c->syscall_errno != 0)
2391                 fprintf(f,
2392                         "%sSystemCallErrorNumber: %s\n",
2393                         prefix, strna(errno_to_name(c->syscall_errno)));
2394
2395         if (c->apparmor_profile)
2396                 fprintf(f,
2397                         "%sAppArmorProfile: %s%s\n",
2398                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2399 }
2400
2401 void exec_status_start(ExecStatus *s, pid_t pid) {
2402         assert(s);
2403
2404         zero(*s);
2405         s->pid = pid;
2406         dual_timestamp_get(&s->start_timestamp);
2407 }
2408
2409 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2410         assert(s);
2411
2412         if (s->pid && s->pid != pid)
2413                 zero(*s);
2414
2415         s->pid = pid;
2416         dual_timestamp_get(&s->exit_timestamp);
2417
2418         s->code = code;
2419         s->status = status;
2420
2421         if (context) {
2422                 if (context->utmp_id)
2423                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2424
2425                 exec_context_tty_reset(context);
2426         }
2427 }
2428
2429 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2430         char buf[FORMAT_TIMESTAMP_MAX];
2431
2432         assert(s);
2433         assert(f);
2434
2435         if (!prefix)
2436                 prefix = "";
2437
2438         if (s->pid <= 0)
2439                 return;
2440
2441         fprintf(f,
2442                 "%sPID: "PID_FMT"\n",
2443                 prefix, s->pid);
2444
2445         if (s->start_timestamp.realtime > 0)
2446                 fprintf(f,
2447                         "%sStart Timestamp: %s\n",
2448                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2449
2450         if (s->exit_timestamp.realtime > 0)
2451                 fprintf(f,
2452                         "%sExit Timestamp: %s\n"
2453                         "%sExit Code: %s\n"
2454                         "%sExit Status: %i\n",
2455                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2456                         prefix, sigchld_code_to_string(s->code),
2457                         prefix, s->status);
2458 }
2459
2460 char *exec_command_line(char **argv) {
2461         size_t k;
2462         char *n, *p, **a;
2463         bool first = true;
2464
2465         assert(argv);
2466
2467         k = 1;
2468         STRV_FOREACH(a, argv)
2469                 k += strlen(*a)+3;
2470
2471         if (!(n = new(char, k)))
2472                 return NULL;
2473
2474         p = n;
2475         STRV_FOREACH(a, argv) {
2476
2477                 if (!first)
2478                         *(p++) = ' ';
2479                 else
2480                         first = false;
2481
2482                 if (strpbrk(*a, WHITESPACE)) {
2483                         *(p++) = '\'';
2484                         p = stpcpy(p, *a);
2485                         *(p++) = '\'';
2486                 } else
2487                         p = stpcpy(p, *a);
2488
2489         }
2490
2491         *p = 0;
2492
2493         /* FIXME: this doesn't really handle arguments that have
2494          * spaces and ticks in them */
2495
2496         return n;
2497 }
2498
2499 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2500         char *p2;
2501         const char *prefix2;
2502
2503         char *cmd;
2504
2505         assert(c);
2506         assert(f);
2507
2508         if (!prefix)
2509                 prefix = "";
2510         p2 = strappend(prefix, "\t");
2511         prefix2 = p2 ? p2 : prefix;
2512
2513         cmd = exec_command_line(c->argv);
2514
2515         fprintf(f,
2516                 "%sCommand Line: %s\n",
2517                 prefix, cmd ? cmd : strerror(ENOMEM));
2518
2519         free(cmd);
2520
2521         exec_status_dump(&c->exec_status, f, prefix2);
2522
2523         free(p2);
2524 }
2525
2526 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2527         assert(f);
2528
2529         if (!prefix)
2530                 prefix = "";
2531
2532         LIST_FOREACH(command, c, c)
2533                 exec_command_dump(c, f, prefix);
2534 }
2535
2536 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2537         ExecCommand *end;
2538
2539         assert(l);
2540         assert(e);
2541
2542         if (*l) {
2543                 /* It's kind of important, that we keep the order here */
2544                 LIST_FIND_TAIL(command, *l, end);
2545                 LIST_INSERT_AFTER(command, *l, end, e);
2546         } else
2547               *l = e;
2548 }
2549
2550 int exec_command_set(ExecCommand *c, const char *path, ...) {
2551         va_list ap;
2552         char **l, *p;
2553
2554         assert(c);
2555         assert(path);
2556
2557         va_start(ap, path);
2558         l = strv_new_ap(path, ap);
2559         va_end(ap);
2560
2561         if (!l)
2562                 return -ENOMEM;
2563
2564         p = strdup(path);
2565         if (!p) {
2566                 strv_free(l);
2567                 return -ENOMEM;
2568         }
2569
2570         free(c->path);
2571         c->path = p;
2572
2573         strv_free(c->argv);
2574         c->argv = l;
2575
2576         return 0;
2577 }
2578
2579 static int exec_runtime_allocate(ExecRuntime **rt) {
2580
2581         if (*rt)
2582                 return 0;
2583
2584         *rt = new0(ExecRuntime, 1);
2585         if (!*rt)
2586                 return -ENOMEM;
2587
2588         (*rt)->n_ref = 1;
2589         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2590
2591         return 0;
2592 }
2593
2594 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2595         int r;
2596
2597         assert(rt);
2598         assert(c);
2599         assert(id);
2600
2601         if (*rt)
2602                 return 1;
2603
2604         if (!c->private_network && !c->private_tmp)
2605                 return 0;
2606
2607         r = exec_runtime_allocate(rt);
2608         if (r < 0)
2609                 return r;
2610
2611         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2612                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2613                         return -errno;
2614         }
2615
2616         if (c->private_tmp && !(*rt)->tmp_dir) {
2617                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2618                 if (r < 0)
2619                         return r;
2620         }
2621
2622         return 1;
2623 }
2624
2625 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2626         assert(r);
2627         assert(r->n_ref > 0);
2628
2629         r->n_ref++;
2630         return r;
2631 }
2632
2633 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2634
2635         if (!r)
2636                 return NULL;
2637
2638         assert(r->n_ref > 0);
2639
2640         r->n_ref--;
2641         if (r->n_ref <= 0) {
2642                 free(r->tmp_dir);
2643                 free(r->var_tmp_dir);
2644                 close_pipe(r->netns_storage_socket);
2645                 free(r);
2646         }
2647
2648         return NULL;
2649 }
2650
2651 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2652         assert(u);
2653         assert(f);
2654         assert(fds);
2655
2656         if (!rt)
2657                 return 0;
2658
2659         if (rt->tmp_dir)
2660                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2661
2662         if (rt->var_tmp_dir)
2663                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2664
2665         if (rt->netns_storage_socket[0] >= 0) {
2666                 int copy;
2667
2668                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2669                 if (copy < 0)
2670                         return copy;
2671
2672                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2673         }
2674
2675         if (rt->netns_storage_socket[1] >= 0) {
2676                 int copy;
2677
2678                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2679                 if (copy < 0)
2680                         return copy;
2681
2682                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2683         }
2684
2685         return 0;
2686 }
2687
2688 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2689         int r;
2690
2691         assert(rt);
2692         assert(key);
2693         assert(value);
2694
2695         if (streq(key, "tmp-dir")) {
2696                 char *copy;
2697
2698                 r = exec_runtime_allocate(rt);
2699                 if (r < 0)
2700                         return r;
2701
2702                 copy = strdup(value);
2703                 if (!copy)
2704                         return log_oom();
2705
2706                 free((*rt)->tmp_dir);
2707                 (*rt)->tmp_dir = copy;
2708
2709         } else if (streq(key, "var-tmp-dir")) {
2710                 char *copy;
2711
2712                 r = exec_runtime_allocate(rt);
2713                 if (r < 0)
2714                         return r;
2715
2716                 copy = strdup(value);
2717                 if (!copy)
2718                         return log_oom();
2719
2720                 free((*rt)->var_tmp_dir);
2721                 (*rt)->var_tmp_dir = copy;
2722
2723         } else if (streq(key, "netns-socket-0")) {
2724                 int fd;
2725
2726                 r = exec_runtime_allocate(rt);
2727                 if (r < 0)
2728                         return r;
2729
2730                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2731                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2732                 else {
2733                         if ((*rt)->netns_storage_socket[0] >= 0)
2734                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2735
2736                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2737                 }
2738         } else if (streq(key, "netns-socket-1")) {
2739                 int fd;
2740
2741                 r = exec_runtime_allocate(rt);
2742                 if (r < 0)
2743                         return r;
2744
2745                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2746                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2747                 else {
2748                         if ((*rt)->netns_storage_socket[1] >= 0)
2749                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2750
2751                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2752                 }
2753         } else
2754                 return 0;
2755
2756         return 1;
2757 }
2758
2759 static void *remove_tmpdir_thread(void *p) {
2760         _cleanup_free_ char *path = p;
2761
2762         rm_rf_dangerous(path, false, true, false);
2763         return NULL;
2764 }
2765
2766 void exec_runtime_destroy(ExecRuntime *rt) {
2767         int r;
2768
2769         if (!rt)
2770                 return;
2771
2772         /* If there are multiple users of this, let's leave the stuff around */
2773         if (rt->n_ref > 1)
2774                 return;
2775
2776         if (rt->tmp_dir) {
2777                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2778
2779                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2780                 if (r < 0) {
2781                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2782                         free(rt->tmp_dir);
2783                 }
2784
2785                 rt->tmp_dir = NULL;
2786         }
2787
2788         if (rt->var_tmp_dir) {
2789                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2790
2791                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2792                 if (r < 0) {
2793                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2794                         free(rt->var_tmp_dir);
2795                 }
2796
2797                 rt->var_tmp_dir = NULL;
2798         }
2799
2800         close_pipe(rt->netns_storage_socket);
2801 }
2802
2803 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2804         [EXEC_INPUT_NULL] = "null",
2805         [EXEC_INPUT_TTY] = "tty",
2806         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2807         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2808         [EXEC_INPUT_SOCKET] = "socket"
2809 };
2810
2811 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2812
2813 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2814         [EXEC_OUTPUT_INHERIT] = "inherit",
2815         [EXEC_OUTPUT_NULL] = "null",
2816         [EXEC_OUTPUT_TTY] = "tty",
2817         [EXEC_OUTPUT_SYSLOG] = "syslog",
2818         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2819         [EXEC_OUTPUT_KMSG] = "kmsg",
2820         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2821         [EXEC_OUTPUT_JOURNAL] = "journal",
2822         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2823         [EXEC_OUTPUT_SOCKET] = "socket"
2824 };
2825
2826 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);