chiark / gitweb /
core: fix detection of dead processes
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <libgen.h>
43 #undef basename
44
45 #ifdef HAVE_PAM
46 #include <security/pam_appl.h>
47 #endif
48
49 #ifdef HAVE_SELINUX
50 #include <selinux/selinux.h>
51 #endif
52
53 #ifdef HAVE_SECCOMP
54 #include <seccomp.h>
55 #endif
56
57 #include "execute.h"
58 #include "strv.h"
59 #include "macro.h"
60 #include "capability.h"
61 #include "util.h"
62 #include "log.h"
63 #include "sd-messages.h"
64 #include "ioprio.h"
65 #include "securebits.h"
66 #include "namespace.h"
67 #include "tcpwrap.h"
68 #include "exit-status.h"
69 #include "missing.h"
70 #include "utmp-wtmp.h"
71 #include "def.h"
72 #include "path-util.h"
73 #include "env-util.h"
74 #include "fileio.h"
75 #include "unit.h"
76 #include "async.h"
77 #include "selinux-util.h"
78 #include "errno-list.h"
79
80 #ifdef HAVE_SECCOMP
81 #include "seccomp-util.h"
82 #endif
83
84 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
85 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
86
87 /* This assumes there is a 'tty' group */
88 #define TTY_MODE 0620
89
90 #define SNDBUF_SIZE (8*1024*1024)
91
92 static int shift_fds(int fds[], unsigned n_fds) {
93         int start, restart_from;
94
95         if (n_fds <= 0)
96                 return 0;
97
98         /* Modifies the fds array! (sorts it) */
99
100         assert(fds);
101
102         start = 0;
103         for (;;) {
104                 int i;
105
106                 restart_from = -1;
107
108                 for (i = start; i < (int) n_fds; i++) {
109                         int nfd;
110
111                         /* Already at right index? */
112                         if (fds[i] == i+3)
113                                 continue;
114
115                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
116                                 return -errno;
117
118                         close_nointr_nofail(fds[i]);
119                         fds[i] = nfd;
120
121                         /* Hmm, the fd we wanted isn't free? Then
122                          * let's remember that and try again from here*/
123                         if (nfd != i+3 && restart_from < 0)
124                                 restart_from = i;
125                 }
126
127                 if (restart_from < 0)
128                         break;
129
130                 start = restart_from;
131         }
132
133         return 0;
134 }
135
136 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
137         unsigned i;
138         int r;
139
140         if (n_fds <= 0)
141                 return 0;
142
143         assert(fds);
144
145         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
146
147         for (i = 0; i < n_fds; i++) {
148
149                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
150                         return r;
151
152                 /* We unconditionally drop FD_CLOEXEC from the fds,
153                  * since after all we want to pass these fds to our
154                  * children */
155
156                 if ((r = fd_cloexec(fds[i], false)) < 0)
157                         return r;
158         }
159
160         return 0;
161 }
162
163 _pure_ static const char *tty_path(const ExecContext *context) {
164         assert(context);
165
166         if (context->tty_path)
167                 return context->tty_path;
168
169         return "/dev/console";
170 }
171
172 static void exec_context_tty_reset(const ExecContext *context) {
173         assert(context);
174
175         if (context->tty_vhangup)
176                 terminal_vhangup(tty_path(context));
177
178         if (context->tty_reset)
179                 reset_terminal(tty_path(context));
180
181         if (context->tty_vt_disallocate && context->tty_path)
182                 vt_disallocate(context->tty_path);
183 }
184
185 static bool is_terminal_output(ExecOutput o) {
186         return
187                 o == EXEC_OUTPUT_TTY ||
188                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
189                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
190                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
191 }
192
193 static int open_null_as(int flags, int nfd) {
194         int fd, r;
195
196         assert(nfd >= 0);
197
198         fd = open("/dev/null", flags|O_NOCTTY);
199         if (fd < 0)
200                 return -errno;
201
202         if (fd != nfd) {
203                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
204                 close_nointr_nofail(fd);
205         } else
206                 r = nfd;
207
208         return r;
209 }
210
211 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
212         int fd, r;
213         union sockaddr_union sa = {
214                 .un.sun_family = AF_UNIX,
215                 .un.sun_path = "/run/systemd/journal/stdout",
216         };
217
218         assert(context);
219         assert(output < _EXEC_OUTPUT_MAX);
220         assert(ident);
221         assert(nfd >= 0);
222
223         fd = socket(AF_UNIX, SOCK_STREAM, 0);
224         if (fd < 0)
225                 return -errno;
226
227         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
228         if (r < 0) {
229                 close_nointr_nofail(fd);
230                 return -errno;
231         }
232
233         if (shutdown(fd, SHUT_RD) < 0) {
234                 close_nointr_nofail(fd);
235                 return -errno;
236         }
237
238         fd_inc_sndbuf(fd, SNDBUF_SIZE);
239
240         dprintf(fd,
241                 "%s\n"
242                 "%s\n"
243                 "%i\n"
244                 "%i\n"
245                 "%i\n"
246                 "%i\n"
247                 "%i\n",
248                 context->syslog_identifier ? context->syslog_identifier : ident,
249                 unit_id,
250                 context->syslog_priority,
251                 !!context->syslog_level_prefix,
252                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
253                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
254                 is_terminal_output(output));
255
256         if (fd != nfd) {
257                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
258                 close_nointr_nofail(fd);
259         } else
260                 r = nfd;
261
262         return r;
263 }
264 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
265         int fd, r;
266
267         assert(path);
268         assert(nfd >= 0);
269
270         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
271                 return fd;
272
273         if (fd != nfd) {
274                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
275                 close_nointr_nofail(fd);
276         } else
277                 r = nfd;
278
279         return r;
280 }
281
282 static bool is_terminal_input(ExecInput i) {
283         return
284                 i == EXEC_INPUT_TTY ||
285                 i == EXEC_INPUT_TTY_FORCE ||
286                 i == EXEC_INPUT_TTY_FAIL;
287 }
288
289 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
290
291         if (is_terminal_input(std_input) && !apply_tty_stdin)
292                 return EXEC_INPUT_NULL;
293
294         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
295                 return EXEC_INPUT_NULL;
296
297         return std_input;
298 }
299
300 static int fixup_output(ExecOutput std_output, int socket_fd) {
301
302         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
303                 return EXEC_OUTPUT_INHERIT;
304
305         return std_output;
306 }
307
308 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
309         ExecInput i;
310
311         assert(context);
312
313         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
314
315         switch (i) {
316
317         case EXEC_INPUT_NULL:
318                 return open_null_as(O_RDONLY, STDIN_FILENO);
319
320         case EXEC_INPUT_TTY:
321         case EXEC_INPUT_TTY_FORCE:
322         case EXEC_INPUT_TTY_FAIL: {
323                 int fd, r;
324
325                 fd = acquire_terminal(tty_path(context),
326                                       i == EXEC_INPUT_TTY_FAIL,
327                                       i == EXEC_INPUT_TTY_FORCE,
328                                       false,
329                                       (usec_t) -1);
330                 if (fd < 0)
331                         return fd;
332
333                 if (fd != STDIN_FILENO) {
334                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
335                         close_nointr_nofail(fd);
336                 } else
337                         r = STDIN_FILENO;
338
339                 return r;
340         }
341
342         case EXEC_INPUT_SOCKET:
343                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
344
345         default:
346                 assert_not_reached("Unknown input type");
347         }
348 }
349
350 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
351         ExecOutput o;
352         ExecInput i;
353         int r;
354
355         assert(context);
356         assert(ident);
357
358         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
359         o = fixup_output(context->std_output, socket_fd);
360
361         if (fileno == STDERR_FILENO) {
362                 ExecOutput e;
363                 e = fixup_output(context->std_error, socket_fd);
364
365                 /* This expects the input and output are already set up */
366
367                 /* Don't change the stderr file descriptor if we inherit all
368                  * the way and are not on a tty */
369                 if (e == EXEC_OUTPUT_INHERIT &&
370                     o == EXEC_OUTPUT_INHERIT &&
371                     i == EXEC_INPUT_NULL &&
372                     !is_terminal_input(context->std_input) &&
373                     getppid () != 1)
374                         return fileno;
375
376                 /* Duplicate from stdout if possible */
377                 if (e == o || e == EXEC_OUTPUT_INHERIT)
378                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
379
380                 o = e;
381
382         } else if (o == EXEC_OUTPUT_INHERIT) {
383                 /* If input got downgraded, inherit the original value */
384                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
385                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
386
387                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
388                 if (i != EXEC_INPUT_NULL)
389                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
390
391                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
392                 if (getppid() != 1)
393                         return fileno;
394
395                 /* We need to open /dev/null here anew, to get the right access mode. */
396                 return open_null_as(O_WRONLY, fileno);
397         }
398
399         switch (o) {
400
401         case EXEC_OUTPUT_NULL:
402                 return open_null_as(O_WRONLY, fileno);
403
404         case EXEC_OUTPUT_TTY:
405                 if (is_terminal_input(i))
406                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
407
408                 /* We don't reset the terminal if this is just about output */
409                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_SYSLOG:
412         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
413         case EXEC_OUTPUT_KMSG:
414         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
415         case EXEC_OUTPUT_JOURNAL:
416         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
417                 r = connect_logger_as(context, o, ident, unit_id, fileno);
418                 if (r < 0) {
419                         log_struct_unit(LOG_CRIT, unit_id,
420                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
421                                 fileno == STDOUT_FILENO ? "out" : "err",
422                                 unit_id, strerror(-r),
423                                 "ERRNO=%d", -r,
424                                 NULL);
425                         r = open_null_as(O_WRONLY, fileno);
426                 }
427                 return r;
428
429         case EXEC_OUTPUT_SOCKET:
430                 assert(socket_fd >= 0);
431                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
432
433         default:
434                 assert_not_reached("Unknown error type");
435         }
436 }
437
438 static int chown_terminal(int fd, uid_t uid) {
439         struct stat st;
440
441         assert(fd >= 0);
442
443         /* This might fail. What matters are the results. */
444         (void) fchown(fd, uid, -1);
445         (void) fchmod(fd, TTY_MODE);
446
447         if (fstat(fd, &st) < 0)
448                 return -errno;
449
450         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
451                 return -EPERM;
452
453         return 0;
454 }
455
456 static int setup_confirm_stdio(int *_saved_stdin,
457                                int *_saved_stdout) {
458         int fd = -1, saved_stdin, saved_stdout = -1, r;
459
460         assert(_saved_stdin);
461         assert(_saved_stdout);
462
463         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
464         if (saved_stdin < 0)
465                 return -errno;
466
467         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
468         if (saved_stdout < 0) {
469                 r = errno;
470                 goto fail;
471         }
472
473         fd = acquire_terminal(
474                         "/dev/console",
475                         false,
476                         false,
477                         false,
478                         DEFAULT_CONFIRM_USEC);
479         if (fd < 0) {
480                 r = fd;
481                 goto fail;
482         }
483
484         r = chown_terminal(fd, getuid());
485         if (r < 0)
486                 goto fail;
487
488         if (dup2(fd, STDIN_FILENO) < 0) {
489                 r = -errno;
490                 goto fail;
491         }
492
493         if (dup2(fd, STDOUT_FILENO) < 0) {
494                 r = -errno;
495                 goto fail;
496         }
497
498         if (fd >= 2)
499                 close_nointr_nofail(fd);
500
501         *_saved_stdin = saved_stdin;
502         *_saved_stdout = saved_stdout;
503
504         return 0;
505
506 fail:
507         if (saved_stdout >= 0)
508                 close_nointr_nofail(saved_stdout);
509
510         if (saved_stdin >= 0)
511                 close_nointr_nofail(saved_stdin);
512
513         if (fd >= 0)
514                 close_nointr_nofail(fd);
515
516         return r;
517 }
518
519 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
520         int fd;
521         va_list ap;
522
523         assert(format);
524
525         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
526         if (fd < 0)
527                 return fd;
528
529         va_start(ap, format);
530         vdprintf(fd, format, ap);
531         va_end(ap);
532
533         close_nointr_nofail(fd);
534
535         return 0;
536 }
537
538 static int restore_confirm_stdio(int *saved_stdin,
539                                  int *saved_stdout) {
540
541         int r = 0;
542
543         assert(saved_stdin);
544         assert(saved_stdout);
545
546         release_terminal();
547
548         if (*saved_stdin >= 0)
549                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdout >= 0)
553                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554                         r = -errno;
555
556         if (*saved_stdin >= 0)
557                 close_nointr_nofail(*saved_stdin);
558
559         if (*saved_stdout >= 0)
560                 close_nointr_nofail(*saved_stdout);
561
562         return r;
563 }
564
565 static int ask_for_confirmation(char *response, char **argv) {
566         int saved_stdout = -1, saved_stdin = -1, r;
567         char *line;
568
569         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570         if (r < 0)
571                 return r;
572
573         line = exec_command_line(argv);
574         if (!line)
575                 return -ENOMEM;
576
577         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
578         free(line);
579
580         restore_confirm_stdio(&saved_stdin, &saved_stdout);
581
582         return r;
583 }
584
585 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
586         bool keep_groups = false;
587         int r;
588
589         assert(context);
590
591         /* Lookup and set GID and supplementary group list. Here too
592          * we avoid NSS lookups for gid=0. */
593
594         if (context->group || username) {
595
596                 if (context->group) {
597                         const char *g = context->group;
598
599                         if ((r = get_group_creds(&g, &gid)) < 0)
600                                 return r;
601                 }
602
603                 /* First step, initialize groups from /etc/groups */
604                 if (username && gid != 0) {
605                         if (initgroups(username, gid) < 0)
606                                 return -errno;
607
608                         keep_groups = true;
609                 }
610
611                 /* Second step, set our gids */
612                 if (setresgid(gid, gid, gid) < 0)
613                         return -errno;
614         }
615
616         if (context->supplementary_groups) {
617                 int ngroups_max, k;
618                 gid_t *gids;
619                 char **i;
620
621                 /* Final step, initialize any manually set supplementary groups */
622                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
623
624                 if (!(gids = new(gid_t, ngroups_max)))
625                         return -ENOMEM;
626
627                 if (keep_groups) {
628                         if ((k = getgroups(ngroups_max, gids)) < 0) {
629                                 free(gids);
630                                 return -errno;
631                         }
632                 } else
633                         k = 0;
634
635                 STRV_FOREACH(i, context->supplementary_groups) {
636                         const char *g;
637
638                         if (k >= ngroups_max) {
639                                 free(gids);
640                                 return -E2BIG;
641                         }
642
643                         g = *i;
644                         r = get_group_creds(&g, gids+k);
645                         if (r < 0) {
646                                 free(gids);
647                                 return r;
648                         }
649
650                         k++;
651                 }
652
653                 if (setgroups(k, gids) < 0) {
654                         free(gids);
655                         return -errno;
656                 }
657
658                 free(gids);
659         }
660
661         return 0;
662 }
663
664 static int enforce_user(const ExecContext *context, uid_t uid) {
665         assert(context);
666
667         /* Sets (but doesn't lookup) the uid and make sure we keep the
668          * capabilities while doing so. */
669
670         if (context->capabilities) {
671                 _cleanup_cap_free_ cap_t d = NULL;
672                 static const cap_value_t bits[] = {
673                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
674                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
675                 };
676
677                 /* First step: If we need to keep capabilities but
678                  * drop privileges we need to make sure we keep our
679                  * caps, while we drop privileges. */
680                 if (uid != 0) {
681                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
682
683                         if (prctl(PR_GET_SECUREBITS) != sb)
684                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
685                                         return -errno;
686                 }
687
688                 /* Second step: set the capabilities. This will reduce
689                  * the capabilities to the minimum we need. */
690
691                 d = cap_dup(context->capabilities);
692                 if (!d)
693                         return -errno;
694
695                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
696                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
697                         return -errno;
698
699                 if (cap_set_proc(d) < 0)
700                         return -errno;
701         }
702
703         /* Third step: actually set the uids */
704         if (setresuid(uid, uid, uid) < 0)
705                 return -errno;
706
707         /* At this point we should have all necessary capabilities but
708            are otherwise a normal user. However, the caps might got
709            corrupted due to the setresuid() so we need clean them up
710            later. This is done outside of this call. */
711
712         return 0;
713 }
714
715 #ifdef HAVE_PAM
716
717 static int null_conv(
718                 int num_msg,
719                 const struct pam_message **msg,
720                 struct pam_response **resp,
721                 void *appdata_ptr) {
722
723         /* We don't support conversations */
724
725         return PAM_CONV_ERR;
726 }
727
728 static int setup_pam(
729                 const char *name,
730                 const char *user,
731                 uid_t uid,
732                 const char *tty,
733                 char ***pam_env,
734                 int fds[], unsigned n_fds) {
735
736         static const struct pam_conv conv = {
737                 .conv = null_conv,
738                 .appdata_ptr = NULL
739         };
740
741         pam_handle_t *handle = NULL;
742         sigset_t ss, old_ss;
743         int pam_code = PAM_SUCCESS;
744         int err;
745         char **e = NULL;
746         bool close_session = false;
747         pid_t pam_pid = 0, parent_pid;
748         int flags = 0;
749
750         assert(name);
751         assert(user);
752         assert(pam_env);
753
754         /* We set up PAM in the parent process, then fork. The child
755          * will then stay around until killed via PR_GET_PDEATHSIG or
756          * systemd via the cgroup logic. It will then remove the PAM
757          * session again. The parent process will exec() the actual
758          * daemon. We do things this way to ensure that the main PID
759          * of the daemon is the one we initially fork()ed. */
760
761         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
762                 flags |= PAM_SILENT;
763
764         pam_code = pam_start(name, user, &conv, &handle);
765         if (pam_code != PAM_SUCCESS) {
766                 handle = NULL;
767                 goto fail;
768         }
769
770         if (tty) {
771                 pam_code = pam_set_item(handle, PAM_TTY, tty);
772                 if (pam_code != PAM_SUCCESS)
773                         goto fail;
774         }
775
776         pam_code = pam_acct_mgmt(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         pam_code = pam_open_session(handle, flags);
781         if (pam_code != PAM_SUCCESS)
782                 goto fail;
783
784         close_session = true;
785
786         e = pam_getenvlist(handle);
787         if (!e) {
788                 pam_code = PAM_BUF_ERR;
789                 goto fail;
790         }
791
792         /* Block SIGTERM, so that we know that it won't get lost in
793          * the child */
794         if (sigemptyset(&ss) < 0 ||
795             sigaddset(&ss, SIGTERM) < 0 ||
796             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
797                 goto fail;
798
799         parent_pid = getpid();
800
801         pam_pid = fork();
802         if (pam_pid < 0)
803                 goto fail;
804
805         if (pam_pid == 0) {
806                 int sig;
807                 int r = EXIT_PAM;
808
809                 /* The child's job is to reset the PAM session on
810                  * termination */
811
812                 /* This string must fit in 10 chars (i.e. the length
813                  * of "/sbin/init"), to look pretty in /bin/ps */
814                 rename_process("(sd-pam)");
815
816                 /* Make sure we don't keep open the passed fds in this
817                 child. We assume that otherwise only those fds are
818                 open here that have been opened by PAM. */
819                 close_many(fds, n_fds);
820
821                 /* Drop privileges - we don't need any to pam_close_session
822                  * and this will make PR_SET_PDEATHSIG work in most cases.
823                  * If this fails, ignore the error - but expect sd-pam threads
824                  * to fail to exit normally */
825                 if (setresuid(uid, uid, uid) < 0)
826                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
827
828                 /* Wait until our parent died. This will only work if
829                  * the above setresuid() succeeds, otherwise the kernel
830                  * will not allow unprivileged parents kill their privileged
831                  * children this way. We rely on the control groups kill logic
832                  * to do the rest for us. */
833                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
834                         goto child_finish;
835
836                 /* Check if our parent process might already have
837                  * died? */
838                 if (getppid() == parent_pid) {
839                         for (;;) {
840                                 if (sigwait(&ss, &sig) < 0) {
841                                         if (errno == EINTR)
842                                                 continue;
843
844                                         goto child_finish;
845                                 }
846
847                                 assert(sig == SIGTERM);
848                                 break;
849                         }
850                 }
851
852                 /* If our parent died we'll end the session */
853                 if (getppid() != parent_pid) {
854                         pam_code = pam_close_session(handle, flags);
855                         if (pam_code != PAM_SUCCESS)
856                                 goto child_finish;
857                 }
858
859                 r = 0;
860
861         child_finish:
862                 pam_end(handle, pam_code | flags);
863                 _exit(r);
864         }
865
866         /* If the child was forked off successfully it will do all the
867          * cleanups, so forget about the handle here. */
868         handle = NULL;
869
870         /* Unblock SIGTERM again in the parent */
871         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
872                 goto fail;
873
874         /* We close the log explicitly here, since the PAM modules
875          * might have opened it, but we don't want this fd around. */
876         closelog();
877
878         *pam_env = e;
879         e = NULL;
880
881         return 0;
882
883 fail:
884         if (pam_code != PAM_SUCCESS) {
885                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
886                 err = -EPERM;  /* PAM errors do not map to errno */
887         } else {
888                 log_error("PAM failed: %m");
889                 err = -errno;
890         }
891
892         if (handle) {
893                 if (close_session)
894                         pam_code = pam_close_session(handle, flags);
895
896                 pam_end(handle, pam_code | flags);
897         }
898
899         strv_free(e);
900
901         closelog();
902
903         if (pam_pid > 1) {
904                 kill(pam_pid, SIGTERM);
905                 kill(pam_pid, SIGCONT);
906         }
907
908         return err;
909 }
910 #endif
911
912 static void rename_process_from_path(const char *path) {
913         char process_name[11];
914         const char *p;
915         size_t l;
916
917         /* This resulting string must fit in 10 chars (i.e. the length
918          * of "/sbin/init") to look pretty in /bin/ps */
919
920         p = basename(path);
921         if (isempty(p)) {
922                 rename_process("(...)");
923                 return;
924         }
925
926         l = strlen(p);
927         if (l > 8) {
928                 /* The end of the process name is usually more
929                  * interesting, since the first bit might just be
930                  * "systemd-" */
931                 p = p + l - 8;
932                 l = 8;
933         }
934
935         process_name[0] = '(';
936         memcpy(process_name+1, p, l);
937         process_name[1+l] = ')';
938         process_name[1+l+1] = 0;
939
940         rename_process(process_name);
941 }
942
943 #ifdef HAVE_SECCOMP
944
945 static int apply_seccomp(ExecContext *c) {
946         uint32_t negative_action, action;
947         scmp_filter_ctx *seccomp;
948         Iterator i;
949         void *id;
950         int r;
951
952         assert(c);
953
954         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
955
956         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
957         if (!seccomp)
958                 return -ENOMEM;
959
960         SET_FOREACH(id, c->syscall_archs, i) {
961                 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
962                 if (r == -EEXIST)
963                         continue;
964                 if (r < 0) {
965                         seccomp_release(seccomp);
966                         return r;
967                 }
968         }
969
970         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
971         SET_FOREACH(id, c->syscall_filter, i) {
972                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
973                 if (r < 0) {
974                         seccomp_release(seccomp);
975                         return r;
976                 }
977         }
978
979         r = seccomp_load(seccomp);
980         seccomp_release(seccomp);
981
982         return r;
983 }
984 #endif
985
986 static void do_idle_pipe_dance(int idle_pipe[4]) {
987         assert(idle_pipe);
988
989         if (idle_pipe[1] >= 0)
990                 close_nointr_nofail(idle_pipe[1]);
991         if (idle_pipe[2] >= 0)
992                 close_nointr_nofail(idle_pipe[2]);
993
994         if (idle_pipe[0] >= 0) {
995                 int r;
996
997                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
998
999                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1000                         /* Signal systemd that we are bored and want to continue. */
1001                         write(idle_pipe[3], "x", 1);
1002
1003                         /* Wait for systemd to react to the signal above. */
1004                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1005                 }
1006
1007                 close_nointr_nofail(idle_pipe[0]);
1008
1009         }
1010
1011         if (idle_pipe[3] >= 0)
1012                 close_nointr_nofail(idle_pipe[3]);
1013 }
1014
1015 static int build_environment(
1016                 ExecContext *c,
1017                 unsigned n_fds,
1018                 usec_t watchdog_usec,
1019                 const char *home,
1020                 const char *username,
1021                 const char *shell,
1022                 char ***ret) {
1023
1024         _cleanup_strv_free_ char **our_env = NULL;
1025         unsigned n_env = 0;
1026         char *x;
1027
1028         assert(c);
1029         assert(ret);
1030
1031         our_env = new0(char*, 10);
1032         if (!our_env)
1033                 return -ENOMEM;
1034
1035         if (n_fds > 0) {
1036                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1037                         return -ENOMEM;
1038                 our_env[n_env++] = x;
1039
1040                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1041                         return -ENOMEM;
1042                 our_env[n_env++] = x;
1043         }
1044
1045         if (watchdog_usec > 0) {
1046                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1047                         return -ENOMEM;
1048                 our_env[n_env++] = x;
1049
1050                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1051                         return -ENOMEM;
1052                 our_env[n_env++] = x;
1053         }
1054
1055         if (home) {
1056                 x = strappend("HOME=", home);
1057                 if (!x)
1058                         return -ENOMEM;
1059                 our_env[n_env++] = x;
1060         }
1061
1062         if (username) {
1063                 x = strappend("LOGNAME=", username);
1064                 if (!x)
1065                         return -ENOMEM;
1066                 our_env[n_env++] = x;
1067
1068                 x = strappend("USER=", username);
1069                 if (!x)
1070                         return -ENOMEM;
1071                 our_env[n_env++] = x;
1072         }
1073
1074         if (shell) {
1075                 x = strappend("SHELL=", shell);
1076                 if (!x)
1077                         return -ENOMEM;
1078                 our_env[n_env++] = x;
1079         }
1080
1081         if (is_terminal_input(c->std_input) ||
1082             c->std_output == EXEC_OUTPUT_TTY ||
1083             c->std_error == EXEC_OUTPUT_TTY ||
1084             c->tty_path) {
1085
1086                 x = strdup(default_term_for_tty(tty_path(c)));
1087                 if (!x)
1088                         return -ENOMEM;
1089                 our_env[n_env++] = x;
1090         }
1091
1092         our_env[n_env++] = NULL;
1093         assert(n_env <= 10);
1094
1095         *ret = our_env;
1096         our_env = NULL;
1097
1098         return 0;
1099 }
1100
1101 int exec_spawn(ExecCommand *command,
1102                char **argv,
1103                ExecContext *context,
1104                int fds[], unsigned n_fds,
1105                char **environment,
1106                bool apply_permissions,
1107                bool apply_chroot,
1108                bool apply_tty_stdin,
1109                bool confirm_spawn,
1110                CGroupControllerMask cgroup_supported,
1111                const char *cgroup_path,
1112                const char *unit_id,
1113                usec_t watchdog_usec,
1114                int idle_pipe[4],
1115                ExecRuntime *runtime,
1116                pid_t *ret) {
1117
1118         _cleanup_strv_free_ char **files_env = NULL;
1119         int socket_fd;
1120         char *line;
1121         pid_t pid;
1122         int r;
1123
1124         assert(command);
1125         assert(context);
1126         assert(ret);
1127         assert(fds || n_fds <= 0);
1128
1129         if (context->std_input == EXEC_INPUT_SOCKET ||
1130             context->std_output == EXEC_OUTPUT_SOCKET ||
1131             context->std_error == EXEC_OUTPUT_SOCKET) {
1132
1133                 if (n_fds != 1)
1134                         return -EINVAL;
1135
1136                 socket_fd = fds[0];
1137
1138                 fds = NULL;
1139                 n_fds = 0;
1140         } else
1141                 socket_fd = -1;
1142
1143         r = exec_context_load_environment(context, &files_env);
1144         if (r < 0) {
1145                 log_struct_unit(LOG_ERR,
1146                            unit_id,
1147                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1148                            "ERRNO=%d", -r,
1149                            NULL);
1150                 return r;
1151         }
1152
1153         if (!argv)
1154                 argv = command->argv;
1155
1156         line = exec_command_line(argv);
1157         if (!line)
1158                 return log_oom();
1159
1160         log_struct_unit(LOG_DEBUG,
1161                         unit_id,
1162                         "EXECUTABLE=%s", command->path,
1163                         "MESSAGE=About to execute: %s", line,
1164                         NULL);
1165         free(line);
1166
1167         pid = fork();
1168         if (pid < 0)
1169                 return -errno;
1170
1171         if (pid == 0) {
1172                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1173                 const char *username = NULL, *home = NULL, *shell = NULL;
1174                 unsigned n_dont_close = 0;
1175                 int dont_close[n_fds + 3];
1176                 uid_t uid = (uid_t) -1;
1177                 gid_t gid = (gid_t) -1;
1178                 sigset_t ss;
1179                 int i, err;
1180
1181                 /* child */
1182
1183                 rename_process_from_path(command->path);
1184
1185                 /* We reset exactly these signals, since they are the
1186                  * only ones we set to SIG_IGN in the main daemon. All
1187                  * others we leave untouched because we set them to
1188                  * SIG_DFL or a valid handler initially, both of which
1189                  * will be demoted to SIG_DFL. */
1190                 default_signals(SIGNALS_CRASH_HANDLER,
1191                                 SIGNALS_IGNORE, -1);
1192
1193                 if (context->ignore_sigpipe)
1194                         ignore_signals(SIGPIPE, -1);
1195
1196                 assert_se(sigemptyset(&ss) == 0);
1197                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1198                         err = -errno;
1199                         r = EXIT_SIGNAL_MASK;
1200                         goto fail_child;
1201                 }
1202
1203                 if (idle_pipe)
1204                         do_idle_pipe_dance(idle_pipe);
1205
1206                 /* Close sockets very early to make sure we don't
1207                  * block init reexecution because it cannot bind its
1208                  * sockets */
1209                 log_forget_fds();
1210
1211                 if (socket_fd >= 0)
1212                         dont_close[n_dont_close++] = socket_fd;
1213                 if (n_fds > 0) {
1214                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1215                         n_dont_close += n_fds;
1216                 }
1217                 if (runtime) {
1218                         if (runtime->netns_storage_socket[0] >= 0)
1219                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1220                         if (runtime->netns_storage_socket[1] >= 0)
1221                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1222                 }
1223
1224                 err = close_all_fds(dont_close, n_dont_close);
1225                 if (err < 0) {
1226                         r = EXIT_FDS;
1227                         goto fail_child;
1228                 }
1229
1230                 if (!context->same_pgrp)
1231                         if (setsid() < 0) {
1232                                 err = -errno;
1233                                 r = EXIT_SETSID;
1234                                 goto fail_child;
1235                         }
1236
1237                 if (context->tcpwrap_name) {
1238                         if (socket_fd >= 0)
1239                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1240                                         err = -EACCES;
1241                                         r = EXIT_TCPWRAP;
1242                                         goto fail_child;
1243                                 }
1244
1245                         for (i = 0; i < (int) n_fds; i++) {
1246                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1247                                         err = -EACCES;
1248                                         r = EXIT_TCPWRAP;
1249                                         goto fail_child;
1250                                 }
1251                         }
1252                 }
1253
1254                 exec_context_tty_reset(context);
1255
1256                 if (confirm_spawn) {
1257                         char response;
1258
1259                         err = ask_for_confirmation(&response, argv);
1260                         if (err == -ETIMEDOUT)
1261                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1262                         else if (err < 0)
1263                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1264                         else if (response == 's') {
1265                                 write_confirm_message("Skipping execution.\n");
1266                                 err = -ECANCELED;
1267                                 r = EXIT_CONFIRM;
1268                                 goto fail_child;
1269                         } else if (response == 'n') {
1270                                 write_confirm_message("Failing execution.\n");
1271                                 err = r = 0;
1272                                 goto fail_child;
1273                         }
1274                 }
1275
1276                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1277                  * must sure to drop O_NONBLOCK */
1278                 if (socket_fd >= 0)
1279                         fd_nonblock(socket_fd, false);
1280
1281                 err = setup_input(context, socket_fd, apply_tty_stdin);
1282                 if (err < 0) {
1283                         r = EXIT_STDIN;
1284                         goto fail_child;
1285                 }
1286
1287                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1288                 if (err < 0) {
1289                         r = EXIT_STDOUT;
1290                         goto fail_child;
1291                 }
1292
1293                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1294                 if (err < 0) {
1295                         r = EXIT_STDERR;
1296                         goto fail_child;
1297                 }
1298
1299                 if (cgroup_path) {
1300                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1301                         if (err < 0) {
1302                                 r = EXIT_CGROUP;
1303                                 goto fail_child;
1304                         }
1305                 }
1306
1307                 if (context->oom_score_adjust_set) {
1308                         char t[16];
1309
1310                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1311                         char_array_0(t);
1312
1313                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1314                                 err = -errno;
1315                                 r = EXIT_OOM_ADJUST;
1316                                 goto fail_child;
1317                         }
1318                 }
1319
1320                 if (context->nice_set)
1321                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1322                                 err = -errno;
1323                                 r = EXIT_NICE;
1324                                 goto fail_child;
1325                         }
1326
1327                 if (context->cpu_sched_set) {
1328                         struct sched_param param = {
1329                                 .sched_priority = context->cpu_sched_priority,
1330                         };
1331
1332                         r = sched_setscheduler(0,
1333                                                context->cpu_sched_policy |
1334                                                (context->cpu_sched_reset_on_fork ?
1335                                                 SCHED_RESET_ON_FORK : 0),
1336                                                &param);
1337                         if (r < 0) {
1338                                 err = -errno;
1339                                 r = EXIT_SETSCHEDULER;
1340                                 goto fail_child;
1341                         }
1342                 }
1343
1344                 if (context->cpuset)
1345                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1346                                 err = -errno;
1347                                 r = EXIT_CPUAFFINITY;
1348                                 goto fail_child;
1349                         }
1350
1351                 if (context->ioprio_set)
1352                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1353                                 err = -errno;
1354                                 r = EXIT_IOPRIO;
1355                                 goto fail_child;
1356                         }
1357
1358                 if (context->timer_slack_nsec != (nsec_t) -1)
1359                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1360                                 err = -errno;
1361                                 r = EXIT_TIMERSLACK;
1362                                 goto fail_child;
1363                         }
1364
1365                 if (context->utmp_id)
1366                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1367
1368                 if (context->user) {
1369                         username = context->user;
1370                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1371                         if (err < 0) {
1372                                 r = EXIT_USER;
1373                                 goto fail_child;
1374                         }
1375
1376                         if (is_terminal_input(context->std_input)) {
1377                                 err = chown_terminal(STDIN_FILENO, uid);
1378                                 if (err < 0) {
1379                                         r = EXIT_STDIN;
1380                                         goto fail_child;
1381                                 }
1382                         }
1383                 }
1384
1385 #ifdef HAVE_PAM
1386                 if (cgroup_path && context->user && context->pam_name) {
1387                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1388                         if (err < 0) {
1389                                 r = EXIT_CGROUP;
1390                                 goto fail_child;
1391                         }
1392
1393
1394                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1395                         if (err < 0) {
1396                                 r = EXIT_CGROUP;
1397                                 goto fail_child;
1398                         }
1399                 }
1400 #endif
1401
1402                 if (apply_permissions) {
1403                         err = enforce_groups(context, username, gid);
1404                         if (err < 0) {
1405                                 r = EXIT_GROUP;
1406                                 goto fail_child;
1407                         }
1408                 }
1409
1410                 umask(context->umask);
1411
1412 #ifdef HAVE_PAM
1413                 if (apply_permissions && context->pam_name && username) {
1414                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1415                         if (err < 0) {
1416                                 r = EXIT_PAM;
1417                                 goto fail_child;
1418                         }
1419                 }
1420 #endif
1421                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1422                         err = setup_netns(runtime->netns_storage_socket);
1423                         if (err < 0) {
1424                                 r = EXIT_NETWORK;
1425                                 goto fail_child;
1426                         }
1427                 }
1428
1429                 if (!strv_isempty(context->read_write_dirs) ||
1430                     !strv_isempty(context->read_only_dirs) ||
1431                     !strv_isempty(context->inaccessible_dirs) ||
1432                     context->mount_flags != 0 ||
1433                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1434                     context->private_devices) {
1435
1436                         char *tmp = NULL, *var = NULL;
1437
1438                         /* The runtime struct only contains the parent
1439                          * of the private /tmp, which is
1440                          * non-accessible to world users. Inside of it
1441                          * there's a /tmp that is sticky, and that's
1442                          * the one we want to use here. */
1443
1444                         if (context->private_tmp && runtime) {
1445                                 if (runtime->tmp_dir)
1446                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1447                                 if (runtime->var_tmp_dir)
1448                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1449                         }
1450
1451                         err = setup_namespace(
1452                                         context->read_write_dirs,
1453                                         context->read_only_dirs,
1454                                         context->inaccessible_dirs,
1455                                         tmp,
1456                                         var,
1457                                         context->private_devices,
1458                                         context->mount_flags);
1459
1460                         if (err < 0) {
1461                                 r = EXIT_NAMESPACE;
1462                                 goto fail_child;
1463                         }
1464                 }
1465
1466                 if (apply_chroot) {
1467                         if (context->root_directory)
1468                                 if (chroot(context->root_directory) < 0) {
1469                                         err = -errno;
1470                                         r = EXIT_CHROOT;
1471                                         goto fail_child;
1472                                 }
1473
1474                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1475                                 err = -errno;
1476                                 r = EXIT_CHDIR;
1477                                 goto fail_child;
1478                         }
1479                 } else {
1480                         _cleanup_free_ char *d = NULL;
1481
1482                         if (asprintf(&d, "%s/%s",
1483                                      context->root_directory ? context->root_directory : "",
1484                                      context->working_directory ? context->working_directory : "") < 0) {
1485                                 err = -ENOMEM;
1486                                 r = EXIT_MEMORY;
1487                                 goto fail_child;
1488                         }
1489
1490                         if (chdir(d) < 0) {
1491                                 err = -errno;
1492                                 r = EXIT_CHDIR;
1493                                 goto fail_child;
1494                         }
1495                 }
1496
1497                 /* We repeat the fd closing here, to make sure that
1498                  * nothing is leaked from the PAM modules */
1499                 err = close_all_fds(fds, n_fds);
1500                 if (err >= 0)
1501                         err = shift_fds(fds, n_fds);
1502                 if (err >= 0)
1503                         err = flags_fds(fds, n_fds, context->non_blocking);
1504                 if (err < 0) {
1505                         r = EXIT_FDS;
1506                         goto fail_child;
1507                 }
1508
1509                 if (apply_permissions) {
1510
1511                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1512                                 if (!context->rlimit[i])
1513                                         continue;
1514
1515                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1516                                         err = -errno;
1517                                         r = EXIT_LIMITS;
1518                                         goto fail_child;
1519                                 }
1520                         }
1521
1522                         if (context->capability_bounding_set_drop) {
1523                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1524                                 if (err < 0) {
1525                                         r = EXIT_CAPABILITIES;
1526                                         goto fail_child;
1527                                 }
1528                         }
1529
1530                         if (context->user) {
1531                                 err = enforce_user(context, uid);
1532                                 if (err < 0) {
1533                                         r = EXIT_USER;
1534                                         goto fail_child;
1535                                 }
1536                         }
1537
1538                         /* PR_GET_SECUREBITS is not privileged, while
1539                          * PR_SET_SECUREBITS is. So to suppress
1540                          * potential EPERMs we'll try not to call
1541                          * PR_SET_SECUREBITS unless necessary. */
1542                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1543                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1544                                         err = -errno;
1545                                         r = EXIT_SECUREBITS;
1546                                         goto fail_child;
1547                                 }
1548
1549                         if (context->capabilities)
1550                                 if (cap_set_proc(context->capabilities) < 0) {
1551                                         err = -errno;
1552                                         r = EXIT_CAPABILITIES;
1553                                         goto fail_child;
1554                                 }
1555
1556                         if (context->no_new_privileges)
1557                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1558                                         err = -errno;
1559                                         r = EXIT_NO_NEW_PRIVILEGES;
1560                                         goto fail_child;
1561                                 }
1562
1563 #ifdef HAVE_SECCOMP
1564                         if (context->syscall_filter || context->syscall_archs) {
1565                                 err = apply_seccomp(context);
1566                                 if (err < 0) {
1567                                         r = EXIT_SECCOMP;
1568                                         goto fail_child;
1569                                 }
1570                         }
1571 #endif
1572
1573 #ifdef HAVE_SELINUX
1574                         if (context->selinux_context && use_selinux()) {
1575                                 bool ignore;
1576                                 char* c;
1577
1578                                 c = context->selinux_context;
1579                                 if (c[0] == '-') {
1580                                         c++;
1581                                         ignore = true;
1582                                 } else
1583                                         ignore = false;
1584
1585                                 err = setexeccon(c);
1586                                 if (err < 0 && !ignore) {
1587                                         r = EXIT_SELINUX_CONTEXT;
1588                                         goto fail_child;
1589                                 }
1590                         }
1591 #endif
1592                 }
1593
1594                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1595                 if (r < 0) {
1596                         r = EXIT_MEMORY;
1597                         goto fail_child;
1598                 }
1599
1600                 final_env = strv_env_merge(5,
1601                                            environment,
1602                                            our_env,
1603                                            context->environment,
1604                                            files_env,
1605                                            pam_env,
1606                                            NULL);
1607                 if (!final_env) {
1608                         err = -ENOMEM;
1609                         r = EXIT_MEMORY;
1610                         goto fail_child;
1611                 }
1612
1613                 final_argv = replace_env_argv(argv, final_env);
1614                 if (!final_argv) {
1615                         err = -ENOMEM;
1616                         r = EXIT_MEMORY;
1617                         goto fail_child;
1618                 }
1619
1620                 final_env = strv_env_clean(final_env);
1621
1622                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1623                         line = exec_command_line(final_argv);
1624                         if (line) {
1625                                 log_open();
1626                                 log_struct_unit(LOG_DEBUG,
1627                                                 unit_id,
1628                                                 "EXECUTABLE=%s", command->path,
1629                                                 "MESSAGE=Executing: %s", line,
1630                                                 NULL);
1631                                 log_close();
1632                                 free(line);
1633                                 line = NULL;
1634                         }
1635                 }
1636                 execve(command->path, final_argv, final_env);
1637                 err = -errno;
1638                 r = EXIT_EXEC;
1639
1640         fail_child:
1641                 if (r != 0) {
1642                         log_open();
1643                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1644                                    "EXECUTABLE=%s", command->path,
1645                                    "MESSAGE=Failed at step %s spawning %s: %s",
1646                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1647                                           command->path, strerror(-err),
1648                                    "ERRNO=%d", -err,
1649                                    NULL);
1650                         log_close();
1651                 }
1652
1653                 _exit(r);
1654         }
1655
1656         log_struct_unit(LOG_DEBUG,
1657                         unit_id,
1658                         "MESSAGE=Forked %s as "PID_FMT,
1659                         command->path, pid,
1660                         NULL);
1661
1662         /* We add the new process to the cgroup both in the child (so
1663          * that we can be sure that no user code is ever executed
1664          * outside of the cgroup) and in the parent (so that we can be
1665          * sure that when we kill the cgroup the process will be
1666          * killed too). */
1667         if (cgroup_path)
1668                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1669
1670         exec_status_start(&command->exec_status, pid);
1671
1672         *ret = pid;
1673         return 0;
1674 }
1675
1676 void exec_context_init(ExecContext *c) {
1677         assert(c);
1678
1679         c->umask = 0022;
1680         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1681         c->cpu_sched_policy = SCHED_OTHER;
1682         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1683         c->syslog_level_prefix = true;
1684         c->ignore_sigpipe = true;
1685         c->timer_slack_nsec = (nsec_t) -1;
1686 }
1687
1688 void exec_context_done(ExecContext *c) {
1689         unsigned l;
1690
1691         assert(c);
1692
1693         strv_free(c->environment);
1694         c->environment = NULL;
1695
1696         strv_free(c->environment_files);
1697         c->environment_files = NULL;
1698
1699         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1700                 free(c->rlimit[l]);
1701                 c->rlimit[l] = NULL;
1702         }
1703
1704         free(c->working_directory);
1705         c->working_directory = NULL;
1706         free(c->root_directory);
1707         c->root_directory = NULL;
1708
1709         free(c->tty_path);
1710         c->tty_path = NULL;
1711
1712         free(c->tcpwrap_name);
1713         c->tcpwrap_name = NULL;
1714
1715         free(c->syslog_identifier);
1716         c->syslog_identifier = NULL;
1717
1718         free(c->user);
1719         c->user = NULL;
1720
1721         free(c->group);
1722         c->group = NULL;
1723
1724         strv_free(c->supplementary_groups);
1725         c->supplementary_groups = NULL;
1726
1727         free(c->pam_name);
1728         c->pam_name = NULL;
1729
1730         if (c->capabilities) {
1731                 cap_free(c->capabilities);
1732                 c->capabilities = NULL;
1733         }
1734
1735         strv_free(c->read_only_dirs);
1736         c->read_only_dirs = NULL;
1737
1738         strv_free(c->read_write_dirs);
1739         c->read_write_dirs = NULL;
1740
1741         strv_free(c->inaccessible_dirs);
1742         c->inaccessible_dirs = NULL;
1743
1744         if (c->cpuset)
1745                 CPU_FREE(c->cpuset);
1746
1747         free(c->utmp_id);
1748         c->utmp_id = NULL;
1749
1750         free(c->selinux_context);
1751         c->selinux_context = NULL;
1752
1753 #ifdef HAVE_SECCOMP
1754         set_free(c->syscall_filter);
1755         c->syscall_filter = NULL;
1756
1757         set_free(c->syscall_archs);
1758         c->syscall_archs = NULL;
1759 #endif
1760 }
1761
1762 void exec_command_done(ExecCommand *c) {
1763         assert(c);
1764
1765         free(c->path);
1766         c->path = NULL;
1767
1768         strv_free(c->argv);
1769         c->argv = NULL;
1770 }
1771
1772 void exec_command_done_array(ExecCommand *c, unsigned n) {
1773         unsigned i;
1774
1775         for (i = 0; i < n; i++)
1776                 exec_command_done(c+i);
1777 }
1778
1779 void exec_command_free_list(ExecCommand *c) {
1780         ExecCommand *i;
1781
1782         while ((i = c)) {
1783                 LIST_REMOVE(command, c, i);
1784                 exec_command_done(i);
1785                 free(i);
1786         }
1787 }
1788
1789 void exec_command_free_array(ExecCommand **c, unsigned n) {
1790         unsigned i;
1791
1792         for (i = 0; i < n; i++) {
1793                 exec_command_free_list(c[i]);
1794                 c[i] = NULL;
1795         }
1796 }
1797
1798 int exec_context_load_environment(const ExecContext *c, char ***l) {
1799         char **i, **r = NULL;
1800
1801         assert(c);
1802         assert(l);
1803
1804         STRV_FOREACH(i, c->environment_files) {
1805                 char *fn;
1806                 int k;
1807                 bool ignore = false;
1808                 char **p;
1809                 _cleanup_globfree_ glob_t pglob = {};
1810                 int count, n;
1811
1812                 fn = *i;
1813
1814                 if (fn[0] == '-') {
1815                         ignore = true;
1816                         fn ++;
1817                 }
1818
1819                 if (!path_is_absolute(fn)) {
1820                         if (ignore)
1821                                 continue;
1822
1823                         strv_free(r);
1824                         return -EINVAL;
1825                 }
1826
1827                 /* Filename supports globbing, take all matching files */
1828                 errno = 0;
1829                 if (glob(fn, 0, NULL, &pglob) != 0) {
1830                         if (ignore)
1831                                 continue;
1832
1833                         strv_free(r);
1834                         return errno ? -errno : -EINVAL;
1835                 }
1836                 count = pglob.gl_pathc;
1837                 if (count == 0) {
1838                         if (ignore)
1839                                 continue;
1840
1841                         strv_free(r);
1842                         return -EINVAL;
1843                 }
1844                 for (n = 0; n < count; n++) {
1845                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1846                         if (k < 0) {
1847                                 if (ignore)
1848                                         continue;
1849
1850                                 strv_free(r);
1851                                 return k;
1852                         }
1853                         /* Log invalid environment variables with filename */
1854                         if (p)
1855                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1856
1857                         if (r == NULL)
1858                                 r = p;
1859                         else {
1860                                 char **m;
1861
1862                                 m = strv_env_merge(2, r, p);
1863                                 strv_free(r);
1864                                 strv_free(p);
1865                                 if (!m)
1866                                         return -ENOMEM;
1867
1868                                 r = m;
1869                         }
1870                 }
1871         }
1872
1873         *l = r;
1874
1875         return 0;
1876 }
1877
1878 static bool tty_may_match_dev_console(const char *tty) {
1879         char *active = NULL, *console;
1880         bool b;
1881
1882         if (startswith(tty, "/dev/"))
1883                 tty += 5;
1884
1885         /* trivial identity? */
1886         if (streq(tty, "console"))
1887                 return true;
1888
1889         console = resolve_dev_console(&active);
1890         /* if we could not resolve, assume it may */
1891         if (!console)
1892                 return true;
1893
1894         /* "tty0" means the active VC, so it may be the same sometimes */
1895         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1896         free(active);
1897
1898         return b;
1899 }
1900
1901 bool exec_context_may_touch_console(ExecContext *ec) {
1902         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1903                 is_terminal_input(ec->std_input) ||
1904                 is_terminal_output(ec->std_output) ||
1905                 is_terminal_output(ec->std_error)) &&
1906                tty_may_match_dev_console(tty_path(ec));
1907 }
1908
1909 static void strv_fprintf(FILE *f, char **l) {
1910         char **g;
1911
1912         assert(f);
1913
1914         STRV_FOREACH(g, l)
1915                 fprintf(f, " %s", *g);
1916 }
1917
1918 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1919         char **e;
1920         unsigned i;
1921
1922         assert(c);
1923         assert(f);
1924
1925         prefix = strempty(prefix);
1926
1927         fprintf(f,
1928                 "%sUMask: %04o\n"
1929                 "%sWorkingDirectory: %s\n"
1930                 "%sRootDirectory: %s\n"
1931                 "%sNonBlocking: %s\n"
1932                 "%sPrivateTmp: %s\n"
1933                 "%sPrivateNetwork: %s\n"
1934                 "%sPrivateDevices: %s\n"
1935                 "%sIgnoreSIGPIPE: %s\n",
1936                 prefix, c->umask,
1937                 prefix, c->working_directory ? c->working_directory : "/",
1938                 prefix, c->root_directory ? c->root_directory : "/",
1939                 prefix, yes_no(c->non_blocking),
1940                 prefix, yes_no(c->private_tmp),
1941                 prefix, yes_no(c->private_network),
1942                 prefix, yes_no(c->private_devices),
1943                 prefix, yes_no(c->ignore_sigpipe));
1944
1945         STRV_FOREACH(e, c->environment)
1946                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1947
1948         STRV_FOREACH(e, c->environment_files)
1949                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1950
1951         if (c->tcpwrap_name)
1952                 fprintf(f,
1953                         "%sTCPWrapName: %s\n",
1954                         prefix, c->tcpwrap_name);
1955
1956         if (c->nice_set)
1957                 fprintf(f,
1958                         "%sNice: %i\n",
1959                         prefix, c->nice);
1960
1961         if (c->oom_score_adjust_set)
1962                 fprintf(f,
1963                         "%sOOMScoreAdjust: %i\n",
1964                         prefix, c->oom_score_adjust);
1965
1966         for (i = 0; i < RLIM_NLIMITS; i++)
1967                 if (c->rlimit[i])
1968                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1969
1970         if (c->ioprio_set) {
1971                 char *class_str;
1972                 int r;
1973
1974                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1975                 if (r < 0)
1976                         class_str = NULL;
1977                 fprintf(f,
1978                         "%sIOSchedulingClass: %s\n"
1979                         "%sIOPriority: %i\n",
1980                         prefix, strna(class_str),
1981                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1982                 free(class_str);
1983         }
1984
1985         if (c->cpu_sched_set) {
1986                 char *policy_str;
1987                 int r;
1988
1989                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1990                 if (r < 0)
1991                         policy_str = NULL;
1992                 fprintf(f,
1993                         "%sCPUSchedulingPolicy: %s\n"
1994                         "%sCPUSchedulingPriority: %i\n"
1995                         "%sCPUSchedulingResetOnFork: %s\n",
1996                         prefix, strna(policy_str),
1997                         prefix, c->cpu_sched_priority,
1998                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1999                 free(policy_str);
2000         }
2001
2002         if (c->cpuset) {
2003                 fprintf(f, "%sCPUAffinity:", prefix);
2004                 for (i = 0; i < c->cpuset_ncpus; i++)
2005                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2006                                 fprintf(f, " %u", i);
2007                 fputs("\n", f);
2008         }
2009
2010         if (c->timer_slack_nsec != (nsec_t) -1)
2011                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2012
2013         fprintf(f,
2014                 "%sStandardInput: %s\n"
2015                 "%sStandardOutput: %s\n"
2016                 "%sStandardError: %s\n",
2017                 prefix, exec_input_to_string(c->std_input),
2018                 prefix, exec_output_to_string(c->std_output),
2019                 prefix, exec_output_to_string(c->std_error));
2020
2021         if (c->tty_path)
2022                 fprintf(f,
2023                         "%sTTYPath: %s\n"
2024                         "%sTTYReset: %s\n"
2025                         "%sTTYVHangup: %s\n"
2026                         "%sTTYVTDisallocate: %s\n",
2027                         prefix, c->tty_path,
2028                         prefix, yes_no(c->tty_reset),
2029                         prefix, yes_no(c->tty_vhangup),
2030                         prefix, yes_no(c->tty_vt_disallocate));
2031
2032         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2033             c->std_output == EXEC_OUTPUT_KMSG ||
2034             c->std_output == EXEC_OUTPUT_JOURNAL ||
2035             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2036             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2037             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2038             c->std_error == EXEC_OUTPUT_SYSLOG ||
2039             c->std_error == EXEC_OUTPUT_KMSG ||
2040             c->std_error == EXEC_OUTPUT_JOURNAL ||
2041             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2042             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2043             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2044
2045                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2046
2047                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2048                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2049
2050                 fprintf(f,
2051                         "%sSyslogFacility: %s\n"
2052                         "%sSyslogLevel: %s\n",
2053                         prefix, strna(fac_str),
2054                         prefix, strna(lvl_str));
2055         }
2056
2057         if (c->capabilities) {
2058                 _cleanup_cap_free_charp_ char *t;
2059
2060                 t = cap_to_text(c->capabilities, NULL);
2061                 if (t)
2062                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2063         }
2064
2065         if (c->secure_bits)
2066                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2067                         prefix,
2068                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2069                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2070                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2071                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2072                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2073                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2074
2075         if (c->capability_bounding_set_drop) {
2076                 unsigned long l;
2077                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2078
2079                 for (l = 0; l <= cap_last_cap(); l++)
2080                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2081                                 _cleanup_cap_free_charp_ char *t;
2082
2083                                 t = cap_to_name(l);
2084                                 if (t)
2085                                         fprintf(f, " %s", t);
2086                         }
2087
2088                 fputs("\n", f);
2089         }
2090
2091         if (c->user)
2092                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2093         if (c->group)
2094                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2095
2096         if (strv_length(c->supplementary_groups) > 0) {
2097                 fprintf(f, "%sSupplementaryGroups:", prefix);
2098                 strv_fprintf(f, c->supplementary_groups);
2099                 fputs("\n", f);
2100         }
2101
2102         if (c->pam_name)
2103                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2104
2105         if (strv_length(c->read_write_dirs) > 0) {
2106                 fprintf(f, "%sReadWriteDirs:", prefix);
2107                 strv_fprintf(f, c->read_write_dirs);
2108                 fputs("\n", f);
2109         }
2110
2111         if (strv_length(c->read_only_dirs) > 0) {
2112                 fprintf(f, "%sReadOnlyDirs:", prefix);
2113                 strv_fprintf(f, c->read_only_dirs);
2114                 fputs("\n", f);
2115         }
2116
2117         if (strv_length(c->inaccessible_dirs) > 0) {
2118                 fprintf(f, "%sInaccessibleDirs:", prefix);
2119                 strv_fprintf(f, c->inaccessible_dirs);
2120                 fputs("\n", f);
2121         }
2122
2123         if (c->utmp_id)
2124                 fprintf(f,
2125                         "%sUtmpIdentifier: %s\n",
2126                         prefix, c->utmp_id);
2127
2128         if (c->selinux_context)
2129                 fprintf(f,
2130                         "%sSELinuxContext: %s\n",
2131                         prefix, c->selinux_context);
2132
2133         if (c->syscall_filter) {
2134 #ifdef HAVE_SECCOMP
2135                 Iterator j;
2136                 void *id;
2137                 bool first = true;
2138 #endif
2139
2140                 fprintf(f,
2141                         "%sSystemCallFilter: ",
2142                         prefix);
2143
2144                 if (!c->syscall_whitelist)
2145                         fputc('~', f);
2146
2147 #ifdef HAVE_SECCOMP
2148                 SET_FOREACH(id, c->syscall_filter, j) {
2149                         _cleanup_free_ char *name = NULL;
2150
2151                         if (first)
2152                                 first = false;
2153                         else
2154                                 fputc(' ', f);
2155
2156                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2157                         fputs(strna(name), f);
2158                 }
2159 #endif
2160
2161                 fputc('\n', f);
2162         }
2163
2164         if (c->syscall_archs) {
2165 #ifdef HAVE_SECCOMP
2166                 Iterator j;
2167                 void *id;
2168 #endif
2169
2170                 fprintf(f,
2171                         "%sSystemCallArchitectures:",
2172                         prefix);
2173
2174 #ifdef HAVE_SECCOMP
2175                 SET_FOREACH(id, c->syscall_archs, j)
2176                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2177 #endif
2178                 fputc('\n', f);
2179         }
2180
2181         if (c->syscall_errno != 0)
2182                 fprintf(f,
2183                         "%sSystemCallErrorNumber: %s\n",
2184                         prefix, strna(errno_to_name(c->syscall_errno)));
2185 }
2186
2187 void exec_status_start(ExecStatus *s, pid_t pid) {
2188         assert(s);
2189
2190         zero(*s);
2191         s->pid = pid;
2192         dual_timestamp_get(&s->start_timestamp);
2193 }
2194
2195 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2196         assert(s);
2197
2198         if (s->pid && s->pid != pid)
2199                 zero(*s);
2200
2201         s->pid = pid;
2202         dual_timestamp_get(&s->exit_timestamp);
2203
2204         s->code = code;
2205         s->status = status;
2206
2207         if (context) {
2208                 if (context->utmp_id)
2209                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2210
2211                 exec_context_tty_reset(context);
2212         }
2213 }
2214
2215 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2216         char buf[FORMAT_TIMESTAMP_MAX];
2217
2218         assert(s);
2219         assert(f);
2220
2221         if (!prefix)
2222                 prefix = "";
2223
2224         if (s->pid <= 0)
2225                 return;
2226
2227         fprintf(f,
2228                 "%sPID: "PID_FMT"\n",
2229                 prefix, s->pid);
2230
2231         if (s->start_timestamp.realtime > 0)
2232                 fprintf(f,
2233                         "%sStart Timestamp: %s\n",
2234                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2235
2236         if (s->exit_timestamp.realtime > 0)
2237                 fprintf(f,
2238                         "%sExit Timestamp: %s\n"
2239                         "%sExit Code: %s\n"
2240                         "%sExit Status: %i\n",
2241                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2242                         prefix, sigchld_code_to_string(s->code),
2243                         prefix, s->status);
2244 }
2245
2246 char *exec_command_line(char **argv) {
2247         size_t k;
2248         char *n, *p, **a;
2249         bool first = true;
2250
2251         assert(argv);
2252
2253         k = 1;
2254         STRV_FOREACH(a, argv)
2255                 k += strlen(*a)+3;
2256
2257         if (!(n = new(char, k)))
2258                 return NULL;
2259
2260         p = n;
2261         STRV_FOREACH(a, argv) {
2262
2263                 if (!first)
2264                         *(p++) = ' ';
2265                 else
2266                         first = false;
2267
2268                 if (strpbrk(*a, WHITESPACE)) {
2269                         *(p++) = '\'';
2270                         p = stpcpy(p, *a);
2271                         *(p++) = '\'';
2272                 } else
2273                         p = stpcpy(p, *a);
2274
2275         }
2276
2277         *p = 0;
2278
2279         /* FIXME: this doesn't really handle arguments that have
2280          * spaces and ticks in them */
2281
2282         return n;
2283 }
2284
2285 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2286         char *p2;
2287         const char *prefix2;
2288
2289         char *cmd;
2290
2291         assert(c);
2292         assert(f);
2293
2294         if (!prefix)
2295                 prefix = "";
2296         p2 = strappend(prefix, "\t");
2297         prefix2 = p2 ? p2 : prefix;
2298
2299         cmd = exec_command_line(c->argv);
2300
2301         fprintf(f,
2302                 "%sCommand Line: %s\n",
2303                 prefix, cmd ? cmd : strerror(ENOMEM));
2304
2305         free(cmd);
2306
2307         exec_status_dump(&c->exec_status, f, prefix2);
2308
2309         free(p2);
2310 }
2311
2312 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2313         assert(f);
2314
2315         if (!prefix)
2316                 prefix = "";
2317
2318         LIST_FOREACH(command, c, c)
2319                 exec_command_dump(c, f, prefix);
2320 }
2321
2322 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2323         ExecCommand *end;
2324
2325         assert(l);
2326         assert(e);
2327
2328         if (*l) {
2329                 /* It's kind of important, that we keep the order here */
2330                 LIST_FIND_TAIL(command, *l, end);
2331                 LIST_INSERT_AFTER(command, *l, end, e);
2332         } else
2333               *l = e;
2334 }
2335
2336 int exec_command_set(ExecCommand *c, const char *path, ...) {
2337         va_list ap;
2338         char **l, *p;
2339
2340         assert(c);
2341         assert(path);
2342
2343         va_start(ap, path);
2344         l = strv_new_ap(path, ap);
2345         va_end(ap);
2346
2347         if (!l)
2348                 return -ENOMEM;
2349
2350         p = strdup(path);
2351         if (!p) {
2352                 strv_free(l);
2353                 return -ENOMEM;
2354         }
2355
2356         free(c->path);
2357         c->path = p;
2358
2359         strv_free(c->argv);
2360         c->argv = l;
2361
2362         return 0;
2363 }
2364
2365 static int exec_runtime_allocate(ExecRuntime **rt) {
2366
2367         if (*rt)
2368                 return 0;
2369
2370         *rt = new0(ExecRuntime, 1);
2371         if (!*rt)
2372                 return -ENOMEM;
2373
2374         (*rt)->n_ref = 1;
2375         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2376
2377         return 0;
2378 }
2379
2380 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2381         int r;
2382
2383         assert(rt);
2384         assert(c);
2385         assert(id);
2386
2387         if (*rt)
2388                 return 1;
2389
2390         if (!c->private_network && !c->private_tmp)
2391                 return 0;
2392
2393         r = exec_runtime_allocate(rt);
2394         if (r < 0)
2395                 return r;
2396
2397         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2398                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2399                         return -errno;
2400         }
2401
2402         if (c->private_tmp && !(*rt)->tmp_dir) {
2403                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2404                 if (r < 0)
2405                         return r;
2406         }
2407
2408         return 1;
2409 }
2410
2411 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2412         assert(r);
2413         assert(r->n_ref > 0);
2414
2415         r->n_ref++;
2416         return r;
2417 }
2418
2419 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2420
2421         if (!r)
2422                 return NULL;
2423
2424         assert(r->n_ref > 0);
2425
2426         r->n_ref--;
2427         if (r->n_ref <= 0) {
2428                 free(r->tmp_dir);
2429                 free(r->var_tmp_dir);
2430                 close_pipe(r->netns_storage_socket);
2431                 free(r);
2432         }
2433
2434         return NULL;
2435 }
2436
2437 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2438         assert(u);
2439         assert(f);
2440         assert(fds);
2441
2442         if (!rt)
2443                 return 0;
2444
2445         if (rt->tmp_dir)
2446                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2447
2448         if (rt->var_tmp_dir)
2449                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2450
2451         if (rt->netns_storage_socket[0] >= 0) {
2452                 int copy;
2453
2454                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2455                 if (copy < 0)
2456                         return copy;
2457
2458                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2459         }
2460
2461         if (rt->netns_storage_socket[1] >= 0) {
2462                 int copy;
2463
2464                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2465                 if (copy < 0)
2466                         return copy;
2467
2468                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2469         }
2470
2471         return 0;
2472 }
2473
2474 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2475         int r;
2476
2477         assert(rt);
2478         assert(key);
2479         assert(value);
2480
2481         if (streq(key, "tmp-dir")) {
2482                 char *copy;
2483
2484                 r = exec_runtime_allocate(rt);
2485                 if (r < 0)
2486                         return r;
2487
2488                 copy = strdup(value);
2489                 if (!copy)
2490                         return log_oom();
2491
2492                 free((*rt)->tmp_dir);
2493                 (*rt)->tmp_dir = copy;
2494
2495         } else if (streq(key, "var-tmp-dir")) {
2496                 char *copy;
2497
2498                 r = exec_runtime_allocate(rt);
2499                 if (r < 0)
2500                         return r;
2501
2502                 copy = strdup(value);
2503                 if (!copy)
2504                         return log_oom();
2505
2506                 free((*rt)->var_tmp_dir);
2507                 (*rt)->var_tmp_dir = copy;
2508
2509         } else if (streq(key, "netns-socket-0")) {
2510                 int fd;
2511
2512                 r = exec_runtime_allocate(rt);
2513                 if (r < 0)
2514                         return r;
2515
2516                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2517                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2518                 else {
2519                         if ((*rt)->netns_storage_socket[0] >= 0)
2520                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2521
2522                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2523                 }
2524         } else if (streq(key, "netns-socket-1")) {
2525                 int fd;
2526
2527                 r = exec_runtime_allocate(rt);
2528                 if (r < 0)
2529                         return r;
2530
2531                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2532                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2533                 else {
2534                         if ((*rt)->netns_storage_socket[1] >= 0)
2535                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2536
2537                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2538                 }
2539         } else
2540                 return 0;
2541
2542         return 1;
2543 }
2544
2545 static void *remove_tmpdir_thread(void *p) {
2546         _cleanup_free_ char *path = p;
2547
2548         rm_rf_dangerous(path, false, true, false);
2549         return NULL;
2550 }
2551
2552 void exec_runtime_destroy(ExecRuntime *rt) {
2553         if (!rt)
2554                 return;
2555
2556         /* If there are multiple users of this, let's leave the stuff around */
2557         if (rt->n_ref > 1)
2558                 return;
2559
2560         if (rt->tmp_dir) {
2561                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2562                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2563                 rt->tmp_dir = NULL;
2564         }
2565
2566         if (rt->var_tmp_dir) {
2567                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2568                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2569                 rt->var_tmp_dir = NULL;
2570         }
2571
2572         close_pipe(rt->netns_storage_socket);
2573 }
2574
2575 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2576         [EXEC_INPUT_NULL] = "null",
2577         [EXEC_INPUT_TTY] = "tty",
2578         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2579         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2580         [EXEC_INPUT_SOCKET] = "socket"
2581 };
2582
2583 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2584
2585 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2586         [EXEC_OUTPUT_INHERIT] = "inherit",
2587         [EXEC_OUTPUT_NULL] = "null",
2588         [EXEC_OUTPUT_TTY] = "tty",
2589         [EXEC_OUTPUT_SYSLOG] = "syslog",
2590         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2591         [EXEC_OUTPUT_KMSG] = "kmsg",
2592         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2593         [EXEC_OUTPUT_JOURNAL] = "journal",
2594         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2595         [EXEC_OUTPUT_SOCKET] = "socket"
2596 };
2597
2598 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);