chiark / gitweb /
execute: also set SO_SNDBUF when spawning a service with stdout/stderr connected...
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #include "execute.h"
51 #include "strv.h"
52 #include "macro.h"
53 #include "capability.h"
54 #include "util.h"
55 #include "log.h"
56 #include "sd-messages.h"
57 #include "ioprio.h"
58 #include "securebits.h"
59 #include "namespace.h"
60 #include "tcpwrap.h"
61 #include "exit-status.h"
62 #include "missing.h"
63 #include "utmp-wtmp.h"
64 #include "def.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69 #include "unit.h"
70 #include "async.h"
71
72 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
73 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
74
75 /* This assumes there is a 'tty' group */
76 #define TTY_MODE 0620
77
78 #define SNDBUF_SIZE (8*1024*1024)
79
80 static int shift_fds(int fds[], unsigned n_fds) {
81         int start, restart_from;
82
83         if (n_fds <= 0)
84                 return 0;
85
86         /* Modifies the fds array! (sorts it) */
87
88         assert(fds);
89
90         start = 0;
91         for (;;) {
92                 int i;
93
94                 restart_from = -1;
95
96                 for (i = start; i < (int) n_fds; i++) {
97                         int nfd;
98
99                         /* Already at right index? */
100                         if (fds[i] == i+3)
101                                 continue;
102
103                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
104                                 return -errno;
105
106                         close_nointr_nofail(fds[i]);
107                         fds[i] = nfd;
108
109                         /* Hmm, the fd we wanted isn't free? Then
110                          * let's remember that and try again from here*/
111                         if (nfd != i+3 && restart_from < 0)
112                                 restart_from = i;
113                 }
114
115                 if (restart_from < 0)
116                         break;
117
118                 start = restart_from;
119         }
120
121         return 0;
122 }
123
124 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
125         unsigned i;
126         int r;
127
128         if (n_fds <= 0)
129                 return 0;
130
131         assert(fds);
132
133         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
134
135         for (i = 0; i < n_fds; i++) {
136
137                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
138                         return r;
139
140                 /* We unconditionally drop FD_CLOEXEC from the fds,
141                  * since after all we want to pass these fds to our
142                  * children */
143
144                 if ((r = fd_cloexec(fds[i], false)) < 0)
145                         return r;
146         }
147
148         return 0;
149 }
150
151 _pure_ static const char *tty_path(const ExecContext *context) {
152         assert(context);
153
154         if (context->tty_path)
155                 return context->tty_path;
156
157         return "/dev/console";
158 }
159
160 static void exec_context_tty_reset(const ExecContext *context) {
161         assert(context);
162
163         if (context->tty_vhangup)
164                 terminal_vhangup(tty_path(context));
165
166         if (context->tty_reset)
167                 reset_terminal(tty_path(context));
168
169         if (context->tty_vt_disallocate && context->tty_path)
170                 vt_disallocate(context->tty_path);
171 }
172
173 static bool is_terminal_output(ExecOutput o) {
174         return
175                 o == EXEC_OUTPUT_TTY ||
176                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
177                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
178                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
179 }
180
181 static int open_null_as(int flags, int nfd) {
182         int fd, r;
183
184         assert(nfd >= 0);
185
186         fd = open("/dev/null", flags|O_NOCTTY);
187         if (fd < 0)
188                 return -errno;
189
190         if (fd != nfd) {
191                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
192                 close_nointr_nofail(fd);
193         } else
194                 r = nfd;
195
196         return r;
197 }
198
199 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
200         int fd, r;
201         union sockaddr_union sa = {
202                 .un.sun_family = AF_UNIX,
203                 .un.sun_path = "/run/systemd/journal/stdout",
204         };
205
206         assert(context);
207         assert(output < _EXEC_OUTPUT_MAX);
208         assert(ident);
209         assert(nfd >= 0);
210
211         fd = socket(AF_UNIX, SOCK_STREAM, 0);
212         if (fd < 0)
213                 return -errno;
214
215         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
216         if (r < 0) {
217                 close_nointr_nofail(fd);
218                 return -errno;
219         }
220
221         if (shutdown(fd, SHUT_RD) < 0) {
222                 close_nointr_nofail(fd);
223                 return -errno;
224         }
225
226         fd_inc_sndbuf(fd, SNDBUF_SIZE);
227
228         dprintf(fd,
229                 "%s\n"
230                 "%s\n"
231                 "%i\n"
232                 "%i\n"
233                 "%i\n"
234                 "%i\n"
235                 "%i\n",
236                 context->syslog_identifier ? context->syslog_identifier : ident,
237                 unit_id,
238                 context->syslog_priority,
239                 !!context->syslog_level_prefix,
240                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
241                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
242                 is_terminal_output(output));
243
244         if (fd != nfd) {
245                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
246                 close_nointr_nofail(fd);
247         } else
248                 r = nfd;
249
250         return r;
251 }
252 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
253         int fd, r;
254
255         assert(path);
256         assert(nfd >= 0);
257
258         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
259                 return fd;
260
261         if (fd != nfd) {
262                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
263                 close_nointr_nofail(fd);
264         } else
265                 r = nfd;
266
267         return r;
268 }
269
270 static bool is_terminal_input(ExecInput i) {
271         return
272                 i == EXEC_INPUT_TTY ||
273                 i == EXEC_INPUT_TTY_FORCE ||
274                 i == EXEC_INPUT_TTY_FAIL;
275 }
276
277 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
278
279         if (is_terminal_input(std_input) && !apply_tty_stdin)
280                 return EXEC_INPUT_NULL;
281
282         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
283                 return EXEC_INPUT_NULL;
284
285         return std_input;
286 }
287
288 static int fixup_output(ExecOutput std_output, int socket_fd) {
289
290         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
291                 return EXEC_OUTPUT_INHERIT;
292
293         return std_output;
294 }
295
296 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
297         ExecInput i;
298
299         assert(context);
300
301         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
302
303         switch (i) {
304
305         case EXEC_INPUT_NULL:
306                 return open_null_as(O_RDONLY, STDIN_FILENO);
307
308         case EXEC_INPUT_TTY:
309         case EXEC_INPUT_TTY_FORCE:
310         case EXEC_INPUT_TTY_FAIL: {
311                 int fd, r;
312
313                 fd = acquire_terminal(tty_path(context),
314                                       i == EXEC_INPUT_TTY_FAIL,
315                                       i == EXEC_INPUT_TTY_FORCE,
316                                       false,
317                                       (usec_t) -1);
318                 if (fd < 0)
319                         return fd;
320
321                 if (fd != STDIN_FILENO) {
322                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
323                         close_nointr_nofail(fd);
324                 } else
325                         r = STDIN_FILENO;
326
327                 return r;
328         }
329
330         case EXEC_INPUT_SOCKET:
331                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
332
333         default:
334                 assert_not_reached("Unknown input type");
335         }
336 }
337
338 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
339         ExecOutput o;
340         ExecInput i;
341         int r;
342
343         assert(context);
344         assert(ident);
345
346         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
347         o = fixup_output(context->std_output, socket_fd);
348
349         if (fileno == STDERR_FILENO) {
350                 ExecOutput e;
351                 e = fixup_output(context->std_error, socket_fd);
352
353                 /* This expects the input and output are already set up */
354
355                 /* Don't change the stderr file descriptor if we inherit all
356                  * the way and are not on a tty */
357                 if (e == EXEC_OUTPUT_INHERIT &&
358                     o == EXEC_OUTPUT_INHERIT &&
359                     i == EXEC_INPUT_NULL &&
360                     !is_terminal_input(context->std_input) &&
361                     getppid () != 1)
362                         return fileno;
363
364                 /* Duplicate from stdout if possible */
365                 if (e == o || e == EXEC_OUTPUT_INHERIT)
366                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
367
368                 o = e;
369
370         } else if (o == EXEC_OUTPUT_INHERIT) {
371                 /* If input got downgraded, inherit the original value */
372                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
373                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
374
375                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
376                 if (i != EXEC_INPUT_NULL)
377                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
378
379                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
380                 if (getppid() != 1)
381                         return fileno;
382
383                 /* We need to open /dev/null here anew, to get the right access mode. */
384                 return open_null_as(O_WRONLY, fileno);
385         }
386
387         switch (o) {
388
389         case EXEC_OUTPUT_NULL:
390                 return open_null_as(O_WRONLY, fileno);
391
392         case EXEC_OUTPUT_TTY:
393                 if (is_terminal_input(i))
394                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
395
396                 /* We don't reset the terminal if this is just about output */
397                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
398
399         case EXEC_OUTPUT_SYSLOG:
400         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
401         case EXEC_OUTPUT_KMSG:
402         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
403         case EXEC_OUTPUT_JOURNAL:
404         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
405                 r = connect_logger_as(context, o, ident, unit_id, fileno);
406                 if (r < 0) {
407                         log_struct_unit(LOG_CRIT, unit_id,
408                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
409                                 fileno == STDOUT_FILENO ? "out" : "err",
410                                 unit_id, strerror(-r),
411                                 "ERRNO=%d", -r,
412                                 NULL);
413                         r = open_null_as(O_WRONLY, fileno);
414                 }
415                 return r;
416
417         case EXEC_OUTPUT_SOCKET:
418                 assert(socket_fd >= 0);
419                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
420
421         default:
422                 assert_not_reached("Unknown error type");
423         }
424 }
425
426 static int chown_terminal(int fd, uid_t uid) {
427         struct stat st;
428
429         assert(fd >= 0);
430
431         /* This might fail. What matters are the results. */
432         (void) fchown(fd, uid, -1);
433         (void) fchmod(fd, TTY_MODE);
434
435         if (fstat(fd, &st) < 0)
436                 return -errno;
437
438         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
439                 return -EPERM;
440
441         return 0;
442 }
443
444 static int setup_confirm_stdio(int *_saved_stdin,
445                                int *_saved_stdout) {
446         int fd = -1, saved_stdin, saved_stdout = -1, r;
447
448         assert(_saved_stdin);
449         assert(_saved_stdout);
450
451         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
452         if (saved_stdin < 0)
453                 return -errno;
454
455         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
456         if (saved_stdout < 0) {
457                 r = errno;
458                 goto fail;
459         }
460
461         fd = acquire_terminal(
462                         "/dev/console",
463                         false,
464                         false,
465                         false,
466                         DEFAULT_CONFIRM_USEC);
467         if (fd < 0) {
468                 r = fd;
469                 goto fail;
470         }
471
472         r = chown_terminal(fd, getuid());
473         if (r < 0)
474                 goto fail;
475
476         if (dup2(fd, STDIN_FILENO) < 0) {
477                 r = -errno;
478                 goto fail;
479         }
480
481         if (dup2(fd, STDOUT_FILENO) < 0) {
482                 r = -errno;
483                 goto fail;
484         }
485
486         if (fd >= 2)
487                 close_nointr_nofail(fd);
488
489         *_saved_stdin = saved_stdin;
490         *_saved_stdout = saved_stdout;
491
492         return 0;
493
494 fail:
495         if (saved_stdout >= 0)
496                 close_nointr_nofail(saved_stdout);
497
498         if (saved_stdin >= 0)
499                 close_nointr_nofail(saved_stdin);
500
501         if (fd >= 0)
502                 close_nointr_nofail(fd);
503
504         return r;
505 }
506
507 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
508         int fd;
509         va_list ap;
510
511         assert(format);
512
513         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
514         if (fd < 0)
515                 return fd;
516
517         va_start(ap, format);
518         vdprintf(fd, format, ap);
519         va_end(ap);
520
521         close_nointr_nofail(fd);
522
523         return 0;
524 }
525
526 static int restore_confirm_stdio(int *saved_stdin,
527                                  int *saved_stdout) {
528
529         int r = 0;
530
531         assert(saved_stdin);
532         assert(saved_stdout);
533
534         release_terminal();
535
536         if (*saved_stdin >= 0)
537                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
538                         r = -errno;
539
540         if (*saved_stdout >= 0)
541                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
542                         r = -errno;
543
544         if (*saved_stdin >= 0)
545                 close_nointr_nofail(*saved_stdin);
546
547         if (*saved_stdout >= 0)
548                 close_nointr_nofail(*saved_stdout);
549
550         return r;
551 }
552
553 static int ask_for_confirmation(char *response, char **argv) {
554         int saved_stdout = -1, saved_stdin = -1, r;
555         char *line;
556
557         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
558         if (r < 0)
559                 return r;
560
561         line = exec_command_line(argv);
562         if (!line)
563                 return -ENOMEM;
564
565         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
566         free(line);
567
568         restore_confirm_stdio(&saved_stdin, &saved_stdout);
569
570         return r;
571 }
572
573 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
574         bool keep_groups = false;
575         int r;
576
577         assert(context);
578
579         /* Lookup and set GID and supplementary group list. Here too
580          * we avoid NSS lookups for gid=0. */
581
582         if (context->group || username) {
583
584                 if (context->group) {
585                         const char *g = context->group;
586
587                         if ((r = get_group_creds(&g, &gid)) < 0)
588                                 return r;
589                 }
590
591                 /* First step, initialize groups from /etc/groups */
592                 if (username && gid != 0) {
593                         if (initgroups(username, gid) < 0)
594                                 return -errno;
595
596                         keep_groups = true;
597                 }
598
599                 /* Second step, set our gids */
600                 if (setresgid(gid, gid, gid) < 0)
601                         return -errno;
602         }
603
604         if (context->supplementary_groups) {
605                 int ngroups_max, k;
606                 gid_t *gids;
607                 char **i;
608
609                 /* Final step, initialize any manually set supplementary groups */
610                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
611
612                 if (!(gids = new(gid_t, ngroups_max)))
613                         return -ENOMEM;
614
615                 if (keep_groups) {
616                         if ((k = getgroups(ngroups_max, gids)) < 0) {
617                                 free(gids);
618                                 return -errno;
619                         }
620                 } else
621                         k = 0;
622
623                 STRV_FOREACH(i, context->supplementary_groups) {
624                         const char *g;
625
626                         if (k >= ngroups_max) {
627                                 free(gids);
628                                 return -E2BIG;
629                         }
630
631                         g = *i;
632                         r = get_group_creds(&g, gids+k);
633                         if (r < 0) {
634                                 free(gids);
635                                 return r;
636                         }
637
638                         k++;
639                 }
640
641                 if (setgroups(k, gids) < 0) {
642                         free(gids);
643                         return -errno;
644                 }
645
646                 free(gids);
647         }
648
649         return 0;
650 }
651
652 static int enforce_user(const ExecContext *context, uid_t uid) {
653         int r;
654         assert(context);
655
656         /* Sets (but doesn't lookup) the uid and make sure we keep the
657          * capabilities while doing so. */
658
659         if (context->capabilities) {
660                 cap_t d;
661                 static const cap_value_t bits[] = {
662                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
663                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
664                 };
665
666                 /* First step: If we need to keep capabilities but
667                  * drop privileges we need to make sure we keep our
668                  * caps, while we drop privileges. */
669                 if (uid != 0) {
670                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
671
672                         if (prctl(PR_GET_SECUREBITS) != sb)
673                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
674                                         return -errno;
675                 }
676
677                 /* Second step: set the capabilities. This will reduce
678                  * the capabilities to the minimum we need. */
679
680                 if (!(d = cap_dup(context->capabilities)))
681                         return -errno;
682
683                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
684                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
685                         r = -errno;
686                         cap_free(d);
687                         return r;
688                 }
689
690                 if (cap_set_proc(d) < 0) {
691                         r = -errno;
692                         cap_free(d);
693                         return r;
694                 }
695
696                 cap_free(d);
697         }
698
699         /* Third step: actually set the uids */
700         if (setresuid(uid, uid, uid) < 0)
701                 return -errno;
702
703         /* At this point we should have all necessary capabilities but
704            are otherwise a normal user. However, the caps might got
705            corrupted due to the setresuid() so we need clean them up
706            later. This is done outside of this call. */
707
708         return 0;
709 }
710
711 #ifdef HAVE_PAM
712
713 static int null_conv(
714                 int num_msg,
715                 const struct pam_message **msg,
716                 struct pam_response **resp,
717                 void *appdata_ptr) {
718
719         /* We don't support conversations */
720
721         return PAM_CONV_ERR;
722 }
723
724 static int setup_pam(
725                 const char *name,
726                 const char *user,
727                 uid_t uid,
728                 const char *tty,
729                 char ***pam_env,
730                 int fds[], unsigned n_fds) {
731
732         static const struct pam_conv conv = {
733                 .conv = null_conv,
734                 .appdata_ptr = NULL
735         };
736
737         pam_handle_t *handle = NULL;
738         sigset_t ss, old_ss;
739         int pam_code = PAM_SUCCESS;
740         int err;
741         char **e = NULL;
742         bool close_session = false;
743         pid_t pam_pid = 0, parent_pid;
744         int flags = 0;
745
746         assert(name);
747         assert(user);
748         assert(pam_env);
749
750         /* We set up PAM in the parent process, then fork. The child
751          * will then stay around until killed via PR_GET_PDEATHSIG or
752          * systemd via the cgroup logic. It will then remove the PAM
753          * session again. The parent process will exec() the actual
754          * daemon. We do things this way to ensure that the main PID
755          * of the daemon is the one we initially fork()ed. */
756
757         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
758                 flags |= PAM_SILENT;
759
760         pam_code = pam_start(name, user, &conv, &handle);
761         if (pam_code != PAM_SUCCESS) {
762                 handle = NULL;
763                 goto fail;
764         }
765
766         if (tty) {
767                 pam_code = pam_set_item(handle, PAM_TTY, tty);
768                 if (pam_code != PAM_SUCCESS)
769                         goto fail;
770         }
771
772         pam_code = pam_acct_mgmt(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         pam_code = pam_open_session(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         e = pam_getenvlist(handle);
783         if (!e) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         pam_pid = fork();
798         if (pam_pid < 0)
799                 goto fail;
800
801         if (pam_pid == 0) {
802                 int sig;
803                 int r = EXIT_PAM;
804
805                 /* The child's job is to reset the PAM session on
806                  * termination */
807
808                 /* This string must fit in 10 chars (i.e. the length
809                  * of "/sbin/init"), to look pretty in /bin/ps */
810                 rename_process("(sd-pam)");
811
812                 /* Make sure we don't keep open the passed fds in this
813                 child. We assume that otherwise only those fds are
814                 open here that have been opened by PAM. */
815                 close_many(fds, n_fds);
816
817                 /* Drop privileges - we don't need any to pam_close_session
818                  * and this will make PR_SET_PDEATHSIG work in most cases.
819                  * If this fails, ignore the error - but expect sd-pam threads
820                  * to fail to exit normally */
821                 if (setresuid(uid, uid, uid) < 0)
822                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823
824                 /* Wait until our parent died. This will only work if
825                  * the above setresuid() succeeds, otherwise the kernel
826                  * will not allow unprivileged parents kill their privileged
827                  * children this way. We rely on the control groups kill logic
828                  * to do the rest for us. */
829                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830                         goto child_finish;
831
832                 /* Check if our parent process might already have
833                  * died? */
834                 if (getppid() == parent_pid) {
835                         for (;;) {
836                                 if (sigwait(&ss, &sig) < 0) {
837                                         if (errno == EINTR)
838                                                 continue;
839
840                                         goto child_finish;
841                                 }
842
843                                 assert(sig == SIGTERM);
844                                 break;
845                         }
846                 }
847
848                 /* If our parent died we'll end the session */
849                 if (getppid() != parent_pid) {
850                         pam_code = pam_close_session(handle, flags);
851                         if (pam_code != PAM_SUCCESS)
852                                 goto child_finish;
853                 }
854
855                 r = 0;
856
857         child_finish:
858                 pam_end(handle, pam_code | flags);
859                 _exit(r);
860         }
861
862         /* If the child was forked off successfully it will do all the
863          * cleanups, so forget about the handle here. */
864         handle = NULL;
865
866         /* Unblock SIGTERM again in the parent */
867         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
868                 goto fail;
869
870         /* We close the log explicitly here, since the PAM modules
871          * might have opened it, but we don't want this fd around. */
872         closelog();
873
874         *pam_env = e;
875         e = NULL;
876
877         return 0;
878
879 fail:
880         if (pam_code != PAM_SUCCESS) {
881                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882                 err = -EPERM;  /* PAM errors do not map to errno */
883         } else {
884                 log_error("PAM failed: %m");
885                 err = -errno;
886         }
887
888         if (handle) {
889                 if (close_session)
890                         pam_code = pam_close_session(handle, flags);
891
892                 pam_end(handle, pam_code | flags);
893         }
894
895         strv_free(e);
896
897         closelog();
898
899         if (pam_pid > 1) {
900                 kill(pam_pid, SIGTERM);
901                 kill(pam_pid, SIGCONT);
902         }
903
904         return err;
905 }
906 #endif
907
908 static void rename_process_from_path(const char *path) {
909         char process_name[11];
910         const char *p;
911         size_t l;
912
913         /* This resulting string must fit in 10 chars (i.e. the length
914          * of "/sbin/init") to look pretty in /bin/ps */
915
916         p = basename(path);
917         if (isempty(p)) {
918                 rename_process("(...)");
919                 return;
920         }
921
922         l = strlen(p);
923         if (l > 8) {
924                 /* The end of the process name is usually more
925                  * interesting, since the first bit might just be
926                  * "systemd-" */
927                 p = p + l - 8;
928                 l = 8;
929         }
930
931         process_name[0] = '(';
932         memcpy(process_name+1, p, l);
933         process_name[1+l] = ')';
934         process_name[1+l+1] = 0;
935
936         rename_process(process_name);
937 }
938
939 static int apply_seccomp(uint32_t *syscall_filter) {
940         static const struct sock_filter header[] = {
941                 VALIDATE_ARCHITECTURE,
942                 EXAMINE_SYSCALL
943         };
944         static const struct sock_filter footer[] = {
945                 _KILL_PROCESS
946         };
947
948         int i;
949         unsigned n;
950         struct sock_filter *f;
951         struct sock_fprog prog = {};
952
953         assert(syscall_filter);
954
955         /* First: count the syscalls to check for */
956         for (i = 0, n = 0; i < syscall_max(); i++)
957                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
958                         n++;
959
960         /* Second: build the filter program from a header the syscall
961          * matches and the footer */
962         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
963         memcpy(f, header, sizeof(header));
964
965         for (i = 0, n = 0; i < syscall_max(); i++)
966                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
967                         struct sock_filter item[] = {
968                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
969                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
970                         };
971
972                         assert_cc(ELEMENTSOF(item) == 2);
973
974                         f[ELEMENTSOF(header) + 2*n]  = item[0];
975                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
976
977                         n++;
978                 }
979
980         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
981
982         /* Third: install the filter */
983         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
984         prog.filter = f;
985         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
986                 return -errno;
987
988         return 0;
989 }
990
991 static void do_idle_pipe_dance(int idle_pipe[4]) {
992         assert(idle_pipe);
993
994         if (idle_pipe[1] >= 0)
995                 close_nointr_nofail(idle_pipe[1]);
996         if (idle_pipe[2] >= 0)
997                 close_nointr_nofail(idle_pipe[2]);
998
999         if (idle_pipe[0] >= 0) {
1000                 int r;
1001
1002                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1003
1004                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1005                         /* Signal systemd that we are bored and want to continue. */
1006                         write(idle_pipe[3], "x", 1);
1007
1008                         /* Wait for systemd to react to the signal above. */
1009                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1010                 }
1011
1012                 close_nointr_nofail(idle_pipe[0]);
1013
1014         }
1015
1016         if (idle_pipe[3] >= 0)
1017                 close_nointr_nofail(idle_pipe[3]);
1018 }
1019
1020 int exec_spawn(ExecCommand *command,
1021                char **argv,
1022                ExecContext *context,
1023                int fds[], unsigned n_fds,
1024                char **environment,
1025                bool apply_permissions,
1026                bool apply_chroot,
1027                bool apply_tty_stdin,
1028                bool confirm_spawn,
1029                CGroupControllerMask cgroup_supported,
1030                const char *cgroup_path,
1031                const char *unit_id,
1032                int idle_pipe[4],
1033                ExecRuntime *runtime,
1034                pid_t *ret) {
1035
1036         _cleanup_strv_free_ char **files_env = NULL;
1037         int socket_fd;
1038         char *line;
1039         pid_t pid;
1040         int r;
1041
1042         assert(command);
1043         assert(context);
1044         assert(ret);
1045         assert(fds || n_fds <= 0);
1046
1047         if (context->std_input == EXEC_INPUT_SOCKET ||
1048             context->std_output == EXEC_OUTPUT_SOCKET ||
1049             context->std_error == EXEC_OUTPUT_SOCKET) {
1050
1051                 if (n_fds != 1)
1052                         return -EINVAL;
1053
1054                 socket_fd = fds[0];
1055
1056                 fds = NULL;
1057                 n_fds = 0;
1058         } else
1059                 socket_fd = -1;
1060
1061         r = exec_context_load_environment(context, &files_env);
1062         if (r < 0) {
1063                 log_struct_unit(LOG_ERR,
1064                            unit_id,
1065                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1066                            "ERRNO=%d", -r,
1067                            NULL);
1068                 return r;
1069         }
1070
1071         if (!argv)
1072                 argv = command->argv;
1073
1074         line = exec_command_line(argv);
1075         if (!line)
1076                 return log_oom();
1077
1078         log_struct_unit(LOG_DEBUG,
1079                         unit_id,
1080                         "EXECUTABLE=%s", command->path,
1081                         "MESSAGE=About to execute: %s", line,
1082                         NULL);
1083         free(line);
1084
1085         pid = fork();
1086         if (pid < 0)
1087                 return -errno;
1088
1089         if (pid == 0) {
1090                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1091                 const char *username = NULL, *home = NULL, *shell = NULL;
1092                 unsigned n_dont_close = 0, n_env = 0;
1093                 int dont_close[n_fds + 3];
1094                 uid_t uid = (uid_t) -1;
1095                 gid_t gid = (gid_t) -1;
1096                 sigset_t ss;
1097                 int i, err;
1098
1099                 /* child */
1100
1101                 rename_process_from_path(command->path);
1102
1103                 /* We reset exactly these signals, since they are the
1104                  * only ones we set to SIG_IGN in the main daemon. All
1105                  * others we leave untouched because we set them to
1106                  * SIG_DFL or a valid handler initially, both of which
1107                  * will be demoted to SIG_DFL. */
1108                 default_signals(SIGNALS_CRASH_HANDLER,
1109                                 SIGNALS_IGNORE, -1);
1110
1111                 if (context->ignore_sigpipe)
1112                         ignore_signals(SIGPIPE, -1);
1113
1114                 assert_se(sigemptyset(&ss) == 0);
1115                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1116                         err = -errno;
1117                         r = EXIT_SIGNAL_MASK;
1118                         goto fail_child;
1119                 }
1120
1121                 if (idle_pipe)
1122                         do_idle_pipe_dance(idle_pipe);
1123
1124                 /* Close sockets very early to make sure we don't
1125                  * block init reexecution because it cannot bind its
1126                  * sockets */
1127                 log_forget_fds();
1128
1129                 if (socket_fd >= 0)
1130                         dont_close[n_dont_close++] = socket_fd;
1131                 if (n_fds > 0) {
1132                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1133                         n_dont_close += n_fds;
1134                 }
1135                 if (runtime) {
1136                         if (runtime->netns_storage_socket[0] >= 0)
1137                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1138                         if (runtime->netns_storage_socket[1] >= 0)
1139                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1140                 }
1141
1142                 err = close_all_fds(dont_close, n_dont_close);
1143                 if (err < 0) {
1144                         r = EXIT_FDS;
1145                         goto fail_child;
1146                 }
1147
1148                 if (!context->same_pgrp)
1149                         if (setsid() < 0) {
1150                                 err = -errno;
1151                                 r = EXIT_SETSID;
1152                                 goto fail_child;
1153                         }
1154
1155                 if (context->tcpwrap_name) {
1156                         if (socket_fd >= 0)
1157                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1158                                         err = -EACCES;
1159                                         r = EXIT_TCPWRAP;
1160                                         goto fail_child;
1161                                 }
1162
1163                         for (i = 0; i < (int) n_fds; i++) {
1164                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1165                                         err = -EACCES;
1166                                         r = EXIT_TCPWRAP;
1167                                         goto fail_child;
1168                                 }
1169                         }
1170                 }
1171
1172                 exec_context_tty_reset(context);
1173
1174                 if (confirm_spawn) {
1175                         char response;
1176
1177                         err = ask_for_confirmation(&response, argv);
1178                         if (err == -ETIMEDOUT)
1179                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1180                         else if (err < 0)
1181                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1182                         else if (response == 's') {
1183                                 write_confirm_message("Skipping execution.\n");
1184                                 err = -ECANCELED;
1185                                 r = EXIT_CONFIRM;
1186                                 goto fail_child;
1187                         } else if (response == 'n') {
1188                                 write_confirm_message("Failing execution.\n");
1189                                 err = r = 0;
1190                                 goto fail_child;
1191                         }
1192                 }
1193
1194                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1195                  * must sure to drop O_NONBLOCK */
1196                 if (socket_fd >= 0)
1197                         fd_nonblock(socket_fd, false);
1198
1199                 err = setup_input(context, socket_fd, apply_tty_stdin);
1200                 if (err < 0) {
1201                         r = EXIT_STDIN;
1202                         goto fail_child;
1203                 }
1204
1205                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1206                 if (err < 0) {
1207                         r = EXIT_STDOUT;
1208                         goto fail_child;
1209                 }
1210
1211                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1212                 if (err < 0) {
1213                         r = EXIT_STDERR;
1214                         goto fail_child;
1215                 }
1216
1217                 if (cgroup_path) {
1218                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1219                         if (err < 0) {
1220                                 r = EXIT_CGROUP;
1221                                 goto fail_child;
1222                         }
1223                 }
1224
1225                 if (context->oom_score_adjust_set) {
1226                         char t[16];
1227
1228                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1229                         char_array_0(t);
1230
1231                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1232                                 err = -errno;
1233                                 r = EXIT_OOM_ADJUST;
1234                                 goto fail_child;
1235                         }
1236                 }
1237
1238                 if (context->nice_set)
1239                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1240                                 err = -errno;
1241                                 r = EXIT_NICE;
1242                                 goto fail_child;
1243                         }
1244
1245                 if (context->cpu_sched_set) {
1246                         struct sched_param param = {
1247                                 .sched_priority = context->cpu_sched_priority,
1248                         };
1249
1250                         r = sched_setscheduler(0,
1251                                                context->cpu_sched_policy |
1252                                                (context->cpu_sched_reset_on_fork ?
1253                                                 SCHED_RESET_ON_FORK : 0),
1254                                                &param);
1255                         if (r < 0) {
1256                                 err = -errno;
1257                                 r = EXIT_SETSCHEDULER;
1258                                 goto fail_child;
1259                         }
1260                 }
1261
1262                 if (context->cpuset)
1263                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1264                                 err = -errno;
1265                                 r = EXIT_CPUAFFINITY;
1266                                 goto fail_child;
1267                         }
1268
1269                 if (context->ioprio_set)
1270                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1271                                 err = -errno;
1272                                 r = EXIT_IOPRIO;
1273                                 goto fail_child;
1274                         }
1275
1276                 if (context->timer_slack_nsec != (nsec_t) -1)
1277                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1278                                 err = -errno;
1279                                 r = EXIT_TIMERSLACK;
1280                                 goto fail_child;
1281                         }
1282
1283                 if (context->utmp_id)
1284                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1285
1286                 if (context->user) {
1287                         username = context->user;
1288                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1289                         if (err < 0) {
1290                                 r = EXIT_USER;
1291                                 goto fail_child;
1292                         }
1293
1294                         if (is_terminal_input(context->std_input)) {
1295                                 err = chown_terminal(STDIN_FILENO, uid);
1296                                 if (err < 0) {
1297                                         r = EXIT_STDIN;
1298                                         goto fail_child;
1299                                 }
1300                         }
1301                 }
1302
1303 #ifdef HAVE_PAM
1304                 if (cgroup_path && context->user && context->pam_name) {
1305                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1306                         if (err < 0) {
1307                                 r = EXIT_CGROUP;
1308                                 goto fail_child;
1309                         }
1310
1311
1312                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1313                         if (err < 0) {
1314                                 r = EXIT_CGROUP;
1315                                 goto fail_child;
1316                         }
1317                 }
1318 #endif
1319
1320                 if (apply_permissions) {
1321                         err = enforce_groups(context, username, gid);
1322                         if (err < 0) {
1323                                 r = EXIT_GROUP;
1324                                 goto fail_child;
1325                         }
1326                 }
1327
1328                 umask(context->umask);
1329
1330 #ifdef HAVE_PAM
1331                 if (apply_permissions && context->pam_name && username) {
1332                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1333                         if (err < 0) {
1334                                 r = EXIT_PAM;
1335                                 goto fail_child;
1336                         }
1337                 }
1338 #endif
1339                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1340                         err = setup_netns(runtime->netns_storage_socket);
1341                         if (err < 0) {
1342                                 r = EXIT_NETWORK;
1343                                 goto fail_child;
1344                         }
1345                 }
1346
1347                 if (!strv_isempty(context->read_write_dirs) ||
1348                     !strv_isempty(context->read_only_dirs) ||
1349                     !strv_isempty(context->inaccessible_dirs) ||
1350                     context->mount_flags != 0 ||
1351                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))) {
1352
1353                         char *tmp = NULL, *var = NULL;
1354
1355                         /* The runtime struct only contains the parent
1356                          * of the private /tmp, which is
1357                          * non-accessible to world users. Inside of it
1358                          * there's a /tmp that is sticky, and that's
1359                          * the one we want to use here. */
1360
1361                         if (context->private_tmp && runtime) {
1362                                 if (runtime->tmp_dir)
1363                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1364                                 if (runtime->var_tmp_dir)
1365                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1366                         }
1367
1368                         err = setup_namespace(
1369                                         context->read_write_dirs,
1370                                         context->read_only_dirs,
1371                                         context->inaccessible_dirs,
1372                                         tmp,
1373                                         var,
1374                                         context->mount_flags);
1375
1376                         if (err < 0) {
1377                                 r = EXIT_NAMESPACE;
1378                                 goto fail_child;
1379                         }
1380                 }
1381
1382                 if (apply_chroot) {
1383                         if (context->root_directory)
1384                                 if (chroot(context->root_directory) < 0) {
1385                                         err = -errno;
1386                                         r = EXIT_CHROOT;
1387                                         goto fail_child;
1388                                 }
1389
1390                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1391                                 err = -errno;
1392                                 r = EXIT_CHDIR;
1393                                 goto fail_child;
1394                         }
1395                 } else {
1396                         _cleanup_free_ char *d = NULL;
1397
1398                         if (asprintf(&d, "%s/%s",
1399                                      context->root_directory ? context->root_directory : "",
1400                                      context->working_directory ? context->working_directory : "") < 0) {
1401                                 err = -ENOMEM;
1402                                 r = EXIT_MEMORY;
1403                                 goto fail_child;
1404                         }
1405
1406                         if (chdir(d) < 0) {
1407                                 err = -errno;
1408                                 r = EXIT_CHDIR;
1409                                 goto fail_child;
1410                         }
1411                 }
1412
1413                 /* We repeat the fd closing here, to make sure that
1414                  * nothing is leaked from the PAM modules */
1415                 err = close_all_fds(fds, n_fds);
1416                 if (err >= 0)
1417                         err = shift_fds(fds, n_fds);
1418                 if (err >= 0)
1419                         err = flags_fds(fds, n_fds, context->non_blocking);
1420                 if (err < 0) {
1421                         r = EXIT_FDS;
1422                         goto fail_child;
1423                 }
1424
1425                 if (apply_permissions) {
1426
1427                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1428                                 if (!context->rlimit[i])
1429                                         continue;
1430
1431                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1432                                         err = -errno;
1433                                         r = EXIT_LIMITS;
1434                                         goto fail_child;
1435                                 }
1436                         }
1437
1438                         if (context->capability_bounding_set_drop) {
1439                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1440                                 if (err < 0) {
1441                                         r = EXIT_CAPABILITIES;
1442                                         goto fail_child;
1443                                 }
1444                         }
1445
1446                         if (context->user) {
1447                                 err = enforce_user(context, uid);
1448                                 if (err < 0) {
1449                                         r = EXIT_USER;
1450                                         goto fail_child;
1451                                 }
1452                         }
1453
1454                         /* PR_GET_SECUREBITS is not privileged, while
1455                          * PR_SET_SECUREBITS is. So to suppress
1456                          * potential EPERMs we'll try not to call
1457                          * PR_SET_SECUREBITS unless necessary. */
1458                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1459                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1460                                         err = -errno;
1461                                         r = EXIT_SECUREBITS;
1462                                         goto fail_child;
1463                                 }
1464
1465                         if (context->capabilities)
1466                                 if (cap_set_proc(context->capabilities) < 0) {
1467                                         err = -errno;
1468                                         r = EXIT_CAPABILITIES;
1469                                         goto fail_child;
1470                                 }
1471
1472                         if (context->no_new_privileges)
1473                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1474                                         err = -errno;
1475                                         r = EXIT_NO_NEW_PRIVILEGES;
1476                                         goto fail_child;
1477                                 }
1478
1479                         if (context->syscall_filter) {
1480                                 err = apply_seccomp(context->syscall_filter);
1481                                 if (err < 0) {
1482                                         r = EXIT_SECCOMP;
1483                                         goto fail_child;
1484                                 }
1485                         }
1486                 }
1487
1488                 our_env = new(char*, 8);
1489                 if (!our_env ||
1490                     (n_fds > 0 && (
1491                             asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1492                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0)) ||
1493                     (home && asprintf(our_env + n_env++, "HOME=%s", home) < 0) ||
1494                     (username && (
1495                             asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1496                             asprintf(our_env + n_env++, "USER=%s", username) < 0)) ||
1497                     (shell && asprintf(our_env + n_env++, "SHELL=%s", shell) < 0) ||
1498                     ((is_terminal_input(context->std_input) ||
1499                       context->std_output == EXEC_OUTPUT_TTY ||
1500                       context->std_error == EXEC_OUTPUT_TTY) && (
1501                               !(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))))) {
1502
1503                         err = -ENOMEM;
1504                         r = EXIT_MEMORY;
1505                         goto fail_child;
1506                 }
1507
1508                 our_env[n_env++] = NULL;
1509                 assert(n_env <= 8);
1510
1511                 final_env = strv_env_merge(5,
1512                                            environment,
1513                                            our_env,
1514                                            context->environment,
1515                                            files_env,
1516                                            pam_env,
1517                                            NULL);
1518                 if (!final_env) {
1519                         err = -ENOMEM;
1520                         r = EXIT_MEMORY;
1521                         goto fail_child;
1522                 }
1523
1524                 final_argv = replace_env_argv(argv, final_env);
1525                 if (!final_argv) {
1526                         err = -ENOMEM;
1527                         r = EXIT_MEMORY;
1528                         goto fail_child;
1529                 }
1530
1531                 final_env = strv_env_clean(final_env);
1532
1533                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1534                         line = exec_command_line(final_argv);
1535                         if (line) {
1536                                 log_open();
1537                                 log_struct_unit(LOG_DEBUG,
1538                                                 unit_id,
1539                                                 "EXECUTABLE=%s", command->path,
1540                                                 "MESSAGE=Executing: %s", line,
1541                                                 NULL);
1542                                 log_close();
1543                                 free(line);
1544                                 line = NULL;
1545                         }
1546                 }
1547                 execve(command->path, final_argv, final_env);
1548                 err = -errno;
1549                 r = EXIT_EXEC;
1550
1551         fail_child:
1552                 if (r != 0) {
1553                         log_open();
1554                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1555                                    "EXECUTABLE=%s", command->path,
1556                                    "MESSAGE=Failed at step %s spawning %s: %s",
1557                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1558                                           command->path, strerror(-err),
1559                                    "ERRNO=%d", -err,
1560                                    NULL);
1561                         log_close();
1562                 }
1563
1564                 _exit(r);
1565         }
1566
1567         log_struct_unit(LOG_DEBUG,
1568                         unit_id,
1569                         "MESSAGE=Forked %s as %lu",
1570                         command->path, (unsigned long) pid,
1571                         NULL);
1572
1573         /* We add the new process to the cgroup both in the child (so
1574          * that we can be sure that no user code is ever executed
1575          * outside of the cgroup) and in the parent (so that we can be
1576          * sure that when we kill the cgroup the process will be
1577          * killed too). */
1578         if (cgroup_path)
1579                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1580
1581         exec_status_start(&command->exec_status, pid);
1582
1583         *ret = pid;
1584         return 0;
1585 }
1586
1587 void exec_context_init(ExecContext *c) {
1588         assert(c);
1589
1590         c->umask = 0022;
1591         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1592         c->cpu_sched_policy = SCHED_OTHER;
1593         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1594         c->syslog_level_prefix = true;
1595         c->ignore_sigpipe = true;
1596         c->timer_slack_nsec = (nsec_t) -1;
1597 }
1598
1599 void exec_context_done(ExecContext *c) {
1600         unsigned l;
1601
1602         assert(c);
1603
1604         strv_free(c->environment);
1605         c->environment = NULL;
1606
1607         strv_free(c->environment_files);
1608         c->environment_files = NULL;
1609
1610         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1611                 free(c->rlimit[l]);
1612                 c->rlimit[l] = NULL;
1613         }
1614
1615         free(c->working_directory);
1616         c->working_directory = NULL;
1617         free(c->root_directory);
1618         c->root_directory = NULL;
1619
1620         free(c->tty_path);
1621         c->tty_path = NULL;
1622
1623         free(c->tcpwrap_name);
1624         c->tcpwrap_name = NULL;
1625
1626         free(c->syslog_identifier);
1627         c->syslog_identifier = NULL;
1628
1629         free(c->user);
1630         c->user = NULL;
1631
1632         free(c->group);
1633         c->group = NULL;
1634
1635         strv_free(c->supplementary_groups);
1636         c->supplementary_groups = NULL;
1637
1638         free(c->pam_name);
1639         c->pam_name = NULL;
1640
1641         if (c->capabilities) {
1642                 cap_free(c->capabilities);
1643                 c->capabilities = NULL;
1644         }
1645
1646         strv_free(c->read_only_dirs);
1647         c->read_only_dirs = NULL;
1648
1649         strv_free(c->read_write_dirs);
1650         c->read_write_dirs = NULL;
1651
1652         strv_free(c->inaccessible_dirs);
1653         c->inaccessible_dirs = NULL;
1654
1655         if (c->cpuset)
1656                 CPU_FREE(c->cpuset);
1657
1658         free(c->utmp_id);
1659         c->utmp_id = NULL;
1660
1661         free(c->syscall_filter);
1662         c->syscall_filter = NULL;
1663 }
1664
1665 void exec_command_done(ExecCommand *c) {
1666         assert(c);
1667
1668         free(c->path);
1669         c->path = NULL;
1670
1671         strv_free(c->argv);
1672         c->argv = NULL;
1673 }
1674
1675 void exec_command_done_array(ExecCommand *c, unsigned n) {
1676         unsigned i;
1677
1678         for (i = 0; i < n; i++)
1679                 exec_command_done(c+i);
1680 }
1681
1682 void exec_command_free_list(ExecCommand *c) {
1683         ExecCommand *i;
1684
1685         while ((i = c)) {
1686                 LIST_REMOVE(command, c, i);
1687                 exec_command_done(i);
1688                 free(i);
1689         }
1690 }
1691
1692 void exec_command_free_array(ExecCommand **c, unsigned n) {
1693         unsigned i;
1694
1695         for (i = 0; i < n; i++) {
1696                 exec_command_free_list(c[i]);
1697                 c[i] = NULL;
1698         }
1699 }
1700
1701 int exec_context_load_environment(const ExecContext *c, char ***l) {
1702         char **i, **r = NULL;
1703
1704         assert(c);
1705         assert(l);
1706
1707         STRV_FOREACH(i, c->environment_files) {
1708                 char *fn;
1709                 int k;
1710                 bool ignore = false;
1711                 char **p;
1712                 _cleanup_globfree_ glob_t pglob = {};
1713                 int count, n;
1714
1715                 fn = *i;
1716
1717                 if (fn[0] == '-') {
1718                         ignore = true;
1719                         fn ++;
1720                 }
1721
1722                 if (!path_is_absolute(fn)) {
1723                         if (ignore)
1724                                 continue;
1725
1726                         strv_free(r);
1727                         return -EINVAL;
1728                 }
1729
1730                 /* Filename supports globbing, take all matching files */
1731                 errno = 0;
1732                 if (glob(fn, 0, NULL, &pglob) != 0) {
1733                         if (ignore)
1734                                 continue;
1735
1736                         strv_free(r);
1737                         return errno ? -errno : -EINVAL;
1738                 }
1739                 count = pglob.gl_pathc;
1740                 if (count == 0) {
1741                         if (ignore)
1742                                 continue;
1743
1744                         strv_free(r);
1745                         return -EINVAL;
1746                 }
1747                 for (n = 0; n < count; n++) {
1748                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1749                         if (k < 0) {
1750                                 if (ignore)
1751                                         continue;
1752
1753                                 strv_free(r);
1754                                 return k;
1755                         }
1756                         /* Log invalid environment variables with filename */
1757                         if (p)
1758                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1759
1760                         if (r == NULL)
1761                                 r = p;
1762                         else {
1763                                 char **m;
1764
1765                                 m = strv_env_merge(2, r, p);
1766                                 strv_free(r);
1767                                 strv_free(p);
1768                                 if (!m)
1769                                         return -ENOMEM;
1770
1771                                 r = m;
1772                         }
1773                 }
1774         }
1775
1776         *l = r;
1777
1778         return 0;
1779 }
1780
1781 static bool tty_may_match_dev_console(const char *tty) {
1782         char *active = NULL, *console;
1783         bool b;
1784
1785         if (startswith(tty, "/dev/"))
1786                 tty += 5;
1787
1788         /* trivial identity? */
1789         if (streq(tty, "console"))
1790                 return true;
1791
1792         console = resolve_dev_console(&active);
1793         /* if we could not resolve, assume it may */
1794         if (!console)
1795                 return true;
1796
1797         /* "tty0" means the active VC, so it may be the same sometimes */
1798         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1799         free(active);
1800
1801         return b;
1802 }
1803
1804 bool exec_context_may_touch_console(ExecContext *ec) {
1805         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1806                 is_terminal_input(ec->std_input) ||
1807                 is_terminal_output(ec->std_output) ||
1808                 is_terminal_output(ec->std_error)) &&
1809                tty_may_match_dev_console(tty_path(ec));
1810 }
1811
1812 static void strv_fprintf(FILE *f, char **l) {
1813         char **g;
1814
1815         assert(f);
1816
1817         STRV_FOREACH(g, l)
1818                 fprintf(f, " %s", *g);
1819 }
1820
1821 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1822         char **e;
1823         unsigned i;
1824
1825         assert(c);
1826         assert(f);
1827
1828         prefix = strempty(prefix);
1829
1830         fprintf(f,
1831                 "%sUMask: %04o\n"
1832                 "%sWorkingDirectory: %s\n"
1833                 "%sRootDirectory: %s\n"
1834                 "%sNonBlocking: %s\n"
1835                 "%sPrivateTmp: %s\n"
1836                 "%sPrivateNetwork: %s\n"
1837                 "%sIgnoreSIGPIPE: %s\n",
1838                 prefix, c->umask,
1839                 prefix, c->working_directory ? c->working_directory : "/",
1840                 prefix, c->root_directory ? c->root_directory : "/",
1841                 prefix, yes_no(c->non_blocking),
1842                 prefix, yes_no(c->private_tmp),
1843                 prefix, yes_no(c->private_network),
1844                 prefix, yes_no(c->ignore_sigpipe));
1845
1846         STRV_FOREACH(e, c->environment)
1847                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1848
1849         STRV_FOREACH(e, c->environment_files)
1850                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1851
1852         if (c->tcpwrap_name)
1853                 fprintf(f,
1854                         "%sTCPWrapName: %s\n",
1855                         prefix, c->tcpwrap_name);
1856
1857         if (c->nice_set)
1858                 fprintf(f,
1859                         "%sNice: %i\n",
1860                         prefix, c->nice);
1861
1862         if (c->oom_score_adjust_set)
1863                 fprintf(f,
1864                         "%sOOMScoreAdjust: %i\n",
1865                         prefix, c->oom_score_adjust);
1866
1867         for (i = 0; i < RLIM_NLIMITS; i++)
1868                 if (c->rlimit[i])
1869                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1870
1871         if (c->ioprio_set) {
1872                 char *class_str;
1873                 int r;
1874
1875                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1876                 if (r < 0)
1877                         class_str = NULL;
1878                 fprintf(f,
1879                         "%sIOSchedulingClass: %s\n"
1880                         "%sIOPriority: %i\n",
1881                         prefix, strna(class_str),
1882                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1883                 free(class_str);
1884         }
1885
1886         if (c->cpu_sched_set) {
1887                 char *policy_str;
1888                 int r;
1889
1890                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1891                 if (r < 0)
1892                         policy_str = NULL;
1893                 fprintf(f,
1894                         "%sCPUSchedulingPolicy: %s\n"
1895                         "%sCPUSchedulingPriority: %i\n"
1896                         "%sCPUSchedulingResetOnFork: %s\n",
1897                         prefix, strna(policy_str),
1898                         prefix, c->cpu_sched_priority,
1899                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1900                 free(policy_str);
1901         }
1902
1903         if (c->cpuset) {
1904                 fprintf(f, "%sCPUAffinity:", prefix);
1905                 for (i = 0; i < c->cpuset_ncpus; i++)
1906                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1907                                 fprintf(f, " %i", i);
1908                 fputs("\n", f);
1909         }
1910
1911         if (c->timer_slack_nsec != (nsec_t) -1)
1912                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1913
1914         fprintf(f,
1915                 "%sStandardInput: %s\n"
1916                 "%sStandardOutput: %s\n"
1917                 "%sStandardError: %s\n",
1918                 prefix, exec_input_to_string(c->std_input),
1919                 prefix, exec_output_to_string(c->std_output),
1920                 prefix, exec_output_to_string(c->std_error));
1921
1922         if (c->tty_path)
1923                 fprintf(f,
1924                         "%sTTYPath: %s\n"
1925                         "%sTTYReset: %s\n"
1926                         "%sTTYVHangup: %s\n"
1927                         "%sTTYVTDisallocate: %s\n",
1928                         prefix, c->tty_path,
1929                         prefix, yes_no(c->tty_reset),
1930                         prefix, yes_no(c->tty_vhangup),
1931                         prefix, yes_no(c->tty_vt_disallocate));
1932
1933         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1934             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1935             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1936             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1937                 char *fac_str, *lvl_str;
1938                 int r;
1939
1940                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1941                 if (r < 0)
1942                         fac_str = NULL;
1943
1944                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1945                 if (r < 0)
1946                         lvl_str = NULL;
1947
1948                 fprintf(f,
1949                         "%sSyslogFacility: %s\n"
1950                         "%sSyslogLevel: %s\n",
1951                         prefix, strna(fac_str),
1952                         prefix, strna(lvl_str));
1953                 free(lvl_str);
1954                 free(fac_str);
1955         }
1956
1957         if (c->capabilities) {
1958                 char *t;
1959                 if ((t = cap_to_text(c->capabilities, NULL))) {
1960                         fprintf(f, "%sCapabilities: %s\n",
1961                                 prefix, t);
1962                         cap_free(t);
1963                 }
1964         }
1965
1966         if (c->secure_bits)
1967                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1968                         prefix,
1969                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
1970                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1971                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1972                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1973                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
1974                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1975
1976         if (c->capability_bounding_set_drop) {
1977                 unsigned long l;
1978                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1979
1980                 for (l = 0; l <= cap_last_cap(); l++)
1981                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1982                                 char *t;
1983
1984                                 if ((t = cap_to_name(l))) {
1985                                         fprintf(f, " %s", t);
1986                                         cap_free(t);
1987                                 }
1988                         }
1989
1990                 fputs("\n", f);
1991         }
1992
1993         if (c->user)
1994                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1995         if (c->group)
1996                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1997
1998         if (strv_length(c->supplementary_groups) > 0) {
1999                 fprintf(f, "%sSupplementaryGroups:", prefix);
2000                 strv_fprintf(f, c->supplementary_groups);
2001                 fputs("\n", f);
2002         }
2003
2004         if (c->pam_name)
2005                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2006
2007         if (strv_length(c->read_write_dirs) > 0) {
2008                 fprintf(f, "%sReadWriteDirs:", prefix);
2009                 strv_fprintf(f, c->read_write_dirs);
2010                 fputs("\n", f);
2011         }
2012
2013         if (strv_length(c->read_only_dirs) > 0) {
2014                 fprintf(f, "%sReadOnlyDirs:", prefix);
2015                 strv_fprintf(f, c->read_only_dirs);
2016                 fputs("\n", f);
2017         }
2018
2019         if (strv_length(c->inaccessible_dirs) > 0) {
2020                 fprintf(f, "%sInaccessibleDirs:", prefix);
2021                 strv_fprintf(f, c->inaccessible_dirs);
2022                 fputs("\n", f);
2023         }
2024
2025         if (c->utmp_id)
2026                 fprintf(f,
2027                         "%sUtmpIdentifier: %s\n",
2028                         prefix, c->utmp_id);
2029 }
2030
2031 void exec_status_start(ExecStatus *s, pid_t pid) {
2032         assert(s);
2033
2034         zero(*s);
2035         s->pid = pid;
2036         dual_timestamp_get(&s->start_timestamp);
2037 }
2038
2039 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2040         assert(s);
2041
2042         if (s->pid && s->pid != pid)
2043                 zero(*s);
2044
2045         s->pid = pid;
2046         dual_timestamp_get(&s->exit_timestamp);
2047
2048         s->code = code;
2049         s->status = status;
2050
2051         if (context) {
2052                 if (context->utmp_id)
2053                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2054
2055                 exec_context_tty_reset(context);
2056         }
2057 }
2058
2059 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2060         char buf[FORMAT_TIMESTAMP_MAX];
2061
2062         assert(s);
2063         assert(f);
2064
2065         if (!prefix)
2066                 prefix = "";
2067
2068         if (s->pid <= 0)
2069                 return;
2070
2071         fprintf(f,
2072                 "%sPID: %lu\n",
2073                 prefix, (unsigned long) s->pid);
2074
2075         if (s->start_timestamp.realtime > 0)
2076                 fprintf(f,
2077                         "%sStart Timestamp: %s\n",
2078                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2079
2080         if (s->exit_timestamp.realtime > 0)
2081                 fprintf(f,
2082                         "%sExit Timestamp: %s\n"
2083                         "%sExit Code: %s\n"
2084                         "%sExit Status: %i\n",
2085                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2086                         prefix, sigchld_code_to_string(s->code),
2087                         prefix, s->status);
2088 }
2089
2090 char *exec_command_line(char **argv) {
2091         size_t k;
2092         char *n, *p, **a;
2093         bool first = true;
2094
2095         assert(argv);
2096
2097         k = 1;
2098         STRV_FOREACH(a, argv)
2099                 k += strlen(*a)+3;
2100
2101         if (!(n = new(char, k)))
2102                 return NULL;
2103
2104         p = n;
2105         STRV_FOREACH(a, argv) {
2106
2107                 if (!first)
2108                         *(p++) = ' ';
2109                 else
2110                         first = false;
2111
2112                 if (strpbrk(*a, WHITESPACE)) {
2113                         *(p++) = '\'';
2114                         p = stpcpy(p, *a);
2115                         *(p++) = '\'';
2116                 } else
2117                         p = stpcpy(p, *a);
2118
2119         }
2120
2121         *p = 0;
2122
2123         /* FIXME: this doesn't really handle arguments that have
2124          * spaces and ticks in them */
2125
2126         return n;
2127 }
2128
2129 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2130         char *p2;
2131         const char *prefix2;
2132
2133         char *cmd;
2134
2135         assert(c);
2136         assert(f);
2137
2138         if (!prefix)
2139                 prefix = "";
2140         p2 = strappend(prefix, "\t");
2141         prefix2 = p2 ? p2 : prefix;
2142
2143         cmd = exec_command_line(c->argv);
2144
2145         fprintf(f,
2146                 "%sCommand Line: %s\n",
2147                 prefix, cmd ? cmd : strerror(ENOMEM));
2148
2149         free(cmd);
2150
2151         exec_status_dump(&c->exec_status, f, prefix2);
2152
2153         free(p2);
2154 }
2155
2156 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2157         assert(f);
2158
2159         if (!prefix)
2160                 prefix = "";
2161
2162         LIST_FOREACH(command, c, c)
2163                 exec_command_dump(c, f, prefix);
2164 }
2165
2166 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2167         ExecCommand *end;
2168
2169         assert(l);
2170         assert(e);
2171
2172         if (*l) {
2173                 /* It's kind of important, that we keep the order here */
2174                 LIST_FIND_TAIL(command, *l, end);
2175                 LIST_INSERT_AFTER(command, *l, end, e);
2176         } else
2177               *l = e;
2178 }
2179
2180 int exec_command_set(ExecCommand *c, const char *path, ...) {
2181         va_list ap;
2182         char **l, *p;
2183
2184         assert(c);
2185         assert(path);
2186
2187         va_start(ap, path);
2188         l = strv_new_ap(path, ap);
2189         va_end(ap);
2190
2191         if (!l)
2192                 return -ENOMEM;
2193
2194         p = strdup(path);
2195         if (!p) {
2196                 strv_free(l);
2197                 return -ENOMEM;
2198         }
2199
2200         free(c->path);
2201         c->path = p;
2202
2203         strv_free(c->argv);
2204         c->argv = l;
2205
2206         return 0;
2207 }
2208
2209 static int exec_runtime_allocate(ExecRuntime **rt) {
2210
2211         if (*rt)
2212                 return 0;
2213
2214         *rt = new0(ExecRuntime, 1);
2215         if (!rt)
2216                 return -ENOMEM;
2217
2218         (*rt)->n_ref = 1;
2219         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2220
2221         return 0;
2222 }
2223
2224 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2225         int r;
2226
2227         assert(rt);
2228         assert(c);
2229         assert(id);
2230
2231         if (*rt)
2232                 return 1;
2233
2234         if (!c->private_network && !c->private_tmp)
2235                 return 0;
2236
2237         r = exec_runtime_allocate(rt);
2238         if (r < 0)
2239                 return r;
2240
2241         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2242                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2243                         return -errno;
2244         }
2245
2246         if (c->private_tmp && !(*rt)->tmp_dir) {
2247                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2248                 if (r < 0)
2249                         return r;
2250         }
2251
2252         return 1;
2253 }
2254
2255 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2256         assert(r);
2257         assert(r->n_ref > 0);
2258
2259         r->n_ref++;
2260         return r;
2261 }
2262
2263 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2264
2265         if (!r)
2266                 return NULL;
2267
2268         assert(r->n_ref > 0);
2269
2270         r->n_ref--;
2271         if (r->n_ref <= 0) {
2272                 free(r->tmp_dir);
2273                 free(r->var_tmp_dir);
2274                 close_pipe(r->netns_storage_socket);
2275                 free(r);
2276         }
2277
2278         return NULL;
2279 }
2280
2281 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2282         assert(u);
2283         assert(f);
2284         assert(fds);
2285
2286         if (!rt)
2287                 return 0;
2288
2289         if (rt->tmp_dir)
2290                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2291
2292         if (rt->var_tmp_dir)
2293                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2294
2295         if (rt->netns_storage_socket[0] >= 0) {
2296                 int copy;
2297
2298                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2299                 if (copy < 0)
2300                         return copy;
2301
2302                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2303         }
2304
2305         if (rt->netns_storage_socket[1] >= 0) {
2306                 int copy;
2307
2308                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2309                 if (copy < 0)
2310                         return copy;
2311
2312                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2313         }
2314
2315         return 0;
2316 }
2317
2318 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2319         int r;
2320
2321         assert(rt);
2322         assert(key);
2323         assert(value);
2324
2325         if (streq(key, "tmp-dir")) {
2326                 char *copy;
2327
2328                 r = exec_runtime_allocate(rt);
2329                 if (r < 0)
2330                         return r;
2331
2332                 copy = strdup(value);
2333                 if (!copy)
2334                         return log_oom();
2335
2336                 free((*rt)->tmp_dir);
2337                 (*rt)->tmp_dir = copy;
2338
2339         } else if (streq(key, "var-tmp-dir")) {
2340                 char *copy;
2341
2342                 r = exec_runtime_allocate(rt);
2343                 if (r < 0)
2344                         return r;
2345
2346                 copy = strdup(value);
2347                 if (!copy)
2348                         return log_oom();
2349
2350                 free((*rt)->var_tmp_dir);
2351                 (*rt)->var_tmp_dir = copy;
2352
2353         } else if (streq(key, "netns-socket-0")) {
2354                 int fd;
2355
2356                 r = exec_runtime_allocate(rt);
2357                 if (r < 0)
2358                         return r;
2359
2360                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2361                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2362                 else {
2363                         if ((*rt)->netns_storage_socket[0] >= 0)
2364                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2365
2366                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2367                 }
2368         } else if (streq(key, "netns-socket-1")) {
2369                 int fd;
2370
2371                 r = exec_runtime_allocate(rt);
2372                 if (r < 0)
2373                         return r;
2374
2375                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2376                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2377                 else {
2378                         if ((*rt)->netns_storage_socket[1] >= 0)
2379                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2380
2381                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2382                 }
2383         } else
2384                 return 0;
2385
2386         return 1;
2387 }
2388
2389 static void *remove_tmpdir_thread(void *p) {
2390         _cleanup_free_ char *path = p;
2391
2392         rm_rf_dangerous(path, false, true, false);
2393         return NULL;
2394 }
2395
2396 void exec_runtime_destroy(ExecRuntime *rt) {
2397         if (!rt)
2398                 return;
2399
2400         /* If there are multiple users of this, let's leave the stuff around */
2401         if (rt->n_ref > 1)
2402                 return;
2403
2404         if (rt->tmp_dir) {
2405                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2406                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2407                 rt->tmp_dir = NULL;
2408         }
2409
2410         if (rt->var_tmp_dir) {
2411                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2412                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2413                 rt->var_tmp_dir = NULL;
2414         }
2415
2416         close_pipe(rt->netns_storage_socket);
2417 }
2418
2419 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2420         [EXEC_INPUT_NULL] = "null",
2421         [EXEC_INPUT_TTY] = "tty",
2422         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2423         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2424         [EXEC_INPUT_SOCKET] = "socket"
2425 };
2426
2427 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2428
2429 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2430         [EXEC_OUTPUT_INHERIT] = "inherit",
2431         [EXEC_OUTPUT_NULL] = "null",
2432         [EXEC_OUTPUT_TTY] = "tty",
2433         [EXEC_OUTPUT_SYSLOG] = "syslog",
2434         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2435         [EXEC_OUTPUT_KMSG] = "kmsg",
2436         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2437         [EXEC_OUTPUT_JOURNAL] = "journal",
2438         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2439         [EXEC_OUTPUT_SOCKET] = "socket"
2440 };
2441
2442 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);