chiark / gitweb /
4b1177a7e5eb5eb198ecb333215c1c72c1223d08
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <libgen.h>
43 #undef basename
44
45 #ifdef HAVE_PAM
46 #include <security/pam_appl.h>
47 #endif
48
49 #ifdef HAVE_SELINUX
50 #include <selinux/selinux.h>
51 #endif
52
53 #ifdef HAVE_SECCOMP
54 #include <seccomp.h>
55 #endif
56
57 #include "execute.h"
58 #include "strv.h"
59 #include "macro.h"
60 #include "capability.h"
61 #include "util.h"
62 #include "log.h"
63 #include "sd-messages.h"
64 #include "ioprio.h"
65 #include "securebits.h"
66 #include "namespace.h"
67 #include "tcpwrap.h"
68 #include "exit-status.h"
69 #include "missing.h"
70 #include "utmp-wtmp.h"
71 #include "def.h"
72 #include "path-util.h"
73 #include "env-util.h"
74 #include "fileio.h"
75 #include "unit.h"
76 #include "async.h"
77 #include "selinux-util.h"
78 #include "errno-list.h"
79
80 #ifdef HAVE_SECCOMP
81 #include "seccomp-util.h"
82 #endif
83
84 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
85 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
86
87 /* This assumes there is a 'tty' group */
88 #define TTY_MODE 0620
89
90 #define SNDBUF_SIZE (8*1024*1024)
91
92 static int shift_fds(int fds[], unsigned n_fds) {
93         int start, restart_from;
94
95         if (n_fds <= 0)
96                 return 0;
97
98         /* Modifies the fds array! (sorts it) */
99
100         assert(fds);
101
102         start = 0;
103         for (;;) {
104                 int i;
105
106                 restart_from = -1;
107
108                 for (i = start; i < (int) n_fds; i++) {
109                         int nfd;
110
111                         /* Already at right index? */
112                         if (fds[i] == i+3)
113                                 continue;
114
115                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
116                                 return -errno;
117
118                         close_nointr_nofail(fds[i]);
119                         fds[i] = nfd;
120
121                         /* Hmm, the fd we wanted isn't free? Then
122                          * let's remember that and try again from here*/
123                         if (nfd != i+3 && restart_from < 0)
124                                 restart_from = i;
125                 }
126
127                 if (restart_from < 0)
128                         break;
129
130                 start = restart_from;
131         }
132
133         return 0;
134 }
135
136 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
137         unsigned i;
138         int r;
139
140         if (n_fds <= 0)
141                 return 0;
142
143         assert(fds);
144
145         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
146
147         for (i = 0; i < n_fds; i++) {
148
149                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
150                         return r;
151
152                 /* We unconditionally drop FD_CLOEXEC from the fds,
153                  * since after all we want to pass these fds to our
154                  * children */
155
156                 if ((r = fd_cloexec(fds[i], false)) < 0)
157                         return r;
158         }
159
160         return 0;
161 }
162
163 _pure_ static const char *tty_path(const ExecContext *context) {
164         assert(context);
165
166         if (context->tty_path)
167                 return context->tty_path;
168
169         return "/dev/console";
170 }
171
172 static void exec_context_tty_reset(const ExecContext *context) {
173         assert(context);
174
175         if (context->tty_vhangup)
176                 terminal_vhangup(tty_path(context));
177
178         if (context->tty_reset)
179                 reset_terminal(tty_path(context));
180
181         if (context->tty_vt_disallocate && context->tty_path)
182                 vt_disallocate(context->tty_path);
183 }
184
185 static bool is_terminal_output(ExecOutput o) {
186         return
187                 o == EXEC_OUTPUT_TTY ||
188                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
189                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
190                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
191 }
192
193 static int open_null_as(int flags, int nfd) {
194         int fd, r;
195
196         assert(nfd >= 0);
197
198         fd = open("/dev/null", flags|O_NOCTTY);
199         if (fd < 0)
200                 return -errno;
201
202         if (fd != nfd) {
203                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
204                 close_nointr_nofail(fd);
205         } else
206                 r = nfd;
207
208         return r;
209 }
210
211 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
212         int fd, r;
213         union sockaddr_union sa = {
214                 .un.sun_family = AF_UNIX,
215                 .un.sun_path = "/run/systemd/journal/stdout",
216         };
217
218         assert(context);
219         assert(output < _EXEC_OUTPUT_MAX);
220         assert(ident);
221         assert(nfd >= 0);
222
223         fd = socket(AF_UNIX, SOCK_STREAM, 0);
224         if (fd < 0)
225                 return -errno;
226
227         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
228         if (r < 0) {
229                 close_nointr_nofail(fd);
230                 return -errno;
231         }
232
233         if (shutdown(fd, SHUT_RD) < 0) {
234                 close_nointr_nofail(fd);
235                 return -errno;
236         }
237
238         fd_inc_sndbuf(fd, SNDBUF_SIZE);
239
240         dprintf(fd,
241                 "%s\n"
242                 "%s\n"
243                 "%i\n"
244                 "%i\n"
245                 "%i\n"
246                 "%i\n"
247                 "%i\n",
248                 context->syslog_identifier ? context->syslog_identifier : ident,
249                 unit_id,
250                 context->syslog_priority,
251                 !!context->syslog_level_prefix,
252                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
253                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
254                 is_terminal_output(output));
255
256         if (fd != nfd) {
257                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
258                 close_nointr_nofail(fd);
259         } else
260                 r = nfd;
261
262         return r;
263 }
264 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
265         int fd, r;
266
267         assert(path);
268         assert(nfd >= 0);
269
270         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
271                 return fd;
272
273         if (fd != nfd) {
274                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
275                 close_nointr_nofail(fd);
276         } else
277                 r = nfd;
278
279         return r;
280 }
281
282 static bool is_terminal_input(ExecInput i) {
283         return
284                 i == EXEC_INPUT_TTY ||
285                 i == EXEC_INPUT_TTY_FORCE ||
286                 i == EXEC_INPUT_TTY_FAIL;
287 }
288
289 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
290
291         if (is_terminal_input(std_input) && !apply_tty_stdin)
292                 return EXEC_INPUT_NULL;
293
294         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
295                 return EXEC_INPUT_NULL;
296
297         return std_input;
298 }
299
300 static int fixup_output(ExecOutput std_output, int socket_fd) {
301
302         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
303                 return EXEC_OUTPUT_INHERIT;
304
305         return std_output;
306 }
307
308 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
309         ExecInput i;
310
311         assert(context);
312
313         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
314
315         switch (i) {
316
317         case EXEC_INPUT_NULL:
318                 return open_null_as(O_RDONLY, STDIN_FILENO);
319
320         case EXEC_INPUT_TTY:
321         case EXEC_INPUT_TTY_FORCE:
322         case EXEC_INPUT_TTY_FAIL: {
323                 int fd, r;
324
325                 fd = acquire_terminal(tty_path(context),
326                                       i == EXEC_INPUT_TTY_FAIL,
327                                       i == EXEC_INPUT_TTY_FORCE,
328                                       false,
329                                       (usec_t) -1);
330                 if (fd < 0)
331                         return fd;
332
333                 if (fd != STDIN_FILENO) {
334                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
335                         close_nointr_nofail(fd);
336                 } else
337                         r = STDIN_FILENO;
338
339                 return r;
340         }
341
342         case EXEC_INPUT_SOCKET:
343                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
344
345         default:
346                 assert_not_reached("Unknown input type");
347         }
348 }
349
350 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
351         ExecOutput o;
352         ExecInput i;
353         int r;
354
355         assert(context);
356         assert(ident);
357
358         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
359         o = fixup_output(context->std_output, socket_fd);
360
361         if (fileno == STDERR_FILENO) {
362                 ExecOutput e;
363                 e = fixup_output(context->std_error, socket_fd);
364
365                 /* This expects the input and output are already set up */
366
367                 /* Don't change the stderr file descriptor if we inherit all
368                  * the way and are not on a tty */
369                 if (e == EXEC_OUTPUT_INHERIT &&
370                     o == EXEC_OUTPUT_INHERIT &&
371                     i == EXEC_INPUT_NULL &&
372                     !is_terminal_input(context->std_input) &&
373                     getppid () != 1)
374                         return fileno;
375
376                 /* Duplicate from stdout if possible */
377                 if (e == o || e == EXEC_OUTPUT_INHERIT)
378                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
379
380                 o = e;
381
382         } else if (o == EXEC_OUTPUT_INHERIT) {
383                 /* If input got downgraded, inherit the original value */
384                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
385                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
386
387                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
388                 if (i != EXEC_INPUT_NULL)
389                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
390
391                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
392                 if (getppid() != 1)
393                         return fileno;
394
395                 /* We need to open /dev/null here anew, to get the right access mode. */
396                 return open_null_as(O_WRONLY, fileno);
397         }
398
399         switch (o) {
400
401         case EXEC_OUTPUT_NULL:
402                 return open_null_as(O_WRONLY, fileno);
403
404         case EXEC_OUTPUT_TTY:
405                 if (is_terminal_input(i))
406                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
407
408                 /* We don't reset the terminal if this is just about output */
409                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_SYSLOG:
412         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
413         case EXEC_OUTPUT_KMSG:
414         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
415         case EXEC_OUTPUT_JOURNAL:
416         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
417                 r = connect_logger_as(context, o, ident, unit_id, fileno);
418                 if (r < 0) {
419                         log_struct_unit(LOG_CRIT, unit_id,
420                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
421                                 fileno == STDOUT_FILENO ? "out" : "err",
422                                 unit_id, strerror(-r),
423                                 "ERRNO=%d", -r,
424                                 NULL);
425                         r = open_null_as(O_WRONLY, fileno);
426                 }
427                 return r;
428
429         case EXEC_OUTPUT_SOCKET:
430                 assert(socket_fd >= 0);
431                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
432
433         default:
434                 assert_not_reached("Unknown error type");
435         }
436 }
437
438 static int chown_terminal(int fd, uid_t uid) {
439         struct stat st;
440
441         assert(fd >= 0);
442
443         /* This might fail. What matters are the results. */
444         (void) fchown(fd, uid, -1);
445         (void) fchmod(fd, TTY_MODE);
446
447         if (fstat(fd, &st) < 0)
448                 return -errno;
449
450         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
451                 return -EPERM;
452
453         return 0;
454 }
455
456 static int setup_confirm_stdio(int *_saved_stdin,
457                                int *_saved_stdout) {
458         int fd = -1, saved_stdin, saved_stdout = -1, r;
459
460         assert(_saved_stdin);
461         assert(_saved_stdout);
462
463         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
464         if (saved_stdin < 0)
465                 return -errno;
466
467         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
468         if (saved_stdout < 0) {
469                 r = errno;
470                 goto fail;
471         }
472
473         fd = acquire_terminal(
474                         "/dev/console",
475                         false,
476                         false,
477                         false,
478                         DEFAULT_CONFIRM_USEC);
479         if (fd < 0) {
480                 r = fd;
481                 goto fail;
482         }
483
484         r = chown_terminal(fd, getuid());
485         if (r < 0)
486                 goto fail;
487
488         if (dup2(fd, STDIN_FILENO) < 0) {
489                 r = -errno;
490                 goto fail;
491         }
492
493         if (dup2(fd, STDOUT_FILENO) < 0) {
494                 r = -errno;
495                 goto fail;
496         }
497
498         if (fd >= 2)
499                 close_nointr_nofail(fd);
500
501         *_saved_stdin = saved_stdin;
502         *_saved_stdout = saved_stdout;
503
504         return 0;
505
506 fail:
507         if (saved_stdout >= 0)
508                 close_nointr_nofail(saved_stdout);
509
510         if (saved_stdin >= 0)
511                 close_nointr_nofail(saved_stdin);
512
513         if (fd >= 0)
514                 close_nointr_nofail(fd);
515
516         return r;
517 }
518
519 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
520         int fd;
521         va_list ap;
522
523         assert(format);
524
525         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
526         if (fd < 0)
527                 return fd;
528
529         va_start(ap, format);
530         vdprintf(fd, format, ap);
531         va_end(ap);
532
533         close_nointr_nofail(fd);
534
535         return 0;
536 }
537
538 static int restore_confirm_stdio(int *saved_stdin,
539                                  int *saved_stdout) {
540
541         int r = 0;
542
543         assert(saved_stdin);
544         assert(saved_stdout);
545
546         release_terminal();
547
548         if (*saved_stdin >= 0)
549                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdout >= 0)
553                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554                         r = -errno;
555
556         if (*saved_stdin >= 0)
557                 close_nointr_nofail(*saved_stdin);
558
559         if (*saved_stdout >= 0)
560                 close_nointr_nofail(*saved_stdout);
561
562         return r;
563 }
564
565 static int ask_for_confirmation(char *response, char **argv) {
566         int saved_stdout = -1, saved_stdin = -1, r;
567         char *line;
568
569         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570         if (r < 0)
571                 return r;
572
573         line = exec_command_line(argv);
574         if (!line)
575                 return -ENOMEM;
576
577         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
578         free(line);
579
580         restore_confirm_stdio(&saved_stdin, &saved_stdout);
581
582         return r;
583 }
584
585 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
586         bool keep_groups = false;
587         int r;
588
589         assert(context);
590
591         /* Lookup and set GID and supplementary group list. Here too
592          * we avoid NSS lookups for gid=0. */
593
594         if (context->group || username) {
595
596                 if (context->group) {
597                         const char *g = context->group;
598
599                         if ((r = get_group_creds(&g, &gid)) < 0)
600                                 return r;
601                 }
602
603                 /* First step, initialize groups from /etc/groups */
604                 if (username && gid != 0) {
605                         if (initgroups(username, gid) < 0)
606                                 return -errno;
607
608                         keep_groups = true;
609                 }
610
611                 /* Second step, set our gids */
612                 if (setresgid(gid, gid, gid) < 0)
613                         return -errno;
614         }
615
616         if (context->supplementary_groups) {
617                 int ngroups_max, k;
618                 gid_t *gids;
619                 char **i;
620
621                 /* Final step, initialize any manually set supplementary groups */
622                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
623
624                 if (!(gids = new(gid_t, ngroups_max)))
625                         return -ENOMEM;
626
627                 if (keep_groups) {
628                         if ((k = getgroups(ngroups_max, gids)) < 0) {
629                                 free(gids);
630                                 return -errno;
631                         }
632                 } else
633                         k = 0;
634
635                 STRV_FOREACH(i, context->supplementary_groups) {
636                         const char *g;
637
638                         if (k >= ngroups_max) {
639                                 free(gids);
640                                 return -E2BIG;
641                         }
642
643                         g = *i;
644                         r = get_group_creds(&g, gids+k);
645                         if (r < 0) {
646                                 free(gids);
647                                 return r;
648                         }
649
650                         k++;
651                 }
652
653                 if (setgroups(k, gids) < 0) {
654                         free(gids);
655                         return -errno;
656                 }
657
658                 free(gids);
659         }
660
661         return 0;
662 }
663
664 static int enforce_user(const ExecContext *context, uid_t uid) {
665         assert(context);
666
667         /* Sets (but doesn't lookup) the uid and make sure we keep the
668          * capabilities while doing so. */
669
670         if (context->capabilities) {
671                 _cleanup_cap_free_ cap_t d = NULL;
672                 static const cap_value_t bits[] = {
673                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
674                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
675                 };
676
677                 /* First step: If we need to keep capabilities but
678                  * drop privileges we need to make sure we keep our
679                  * caps, while we drop privileges. */
680                 if (uid != 0) {
681                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
682
683                         if (prctl(PR_GET_SECUREBITS) != sb)
684                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
685                                         return -errno;
686                 }
687
688                 /* Second step: set the capabilities. This will reduce
689                  * the capabilities to the minimum we need. */
690
691                 d = cap_dup(context->capabilities);
692                 if (!d)
693                         return -errno;
694
695                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
696                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
697                         return -errno;
698
699                 if (cap_set_proc(d) < 0)
700                         return -errno;
701         }
702
703         /* Third step: actually set the uids */
704         if (setresuid(uid, uid, uid) < 0)
705                 return -errno;
706
707         /* At this point we should have all necessary capabilities but
708            are otherwise a normal user. However, the caps might got
709            corrupted due to the setresuid() so we need clean them up
710            later. This is done outside of this call. */
711
712         return 0;
713 }
714
715 #ifdef HAVE_PAM
716
717 static int null_conv(
718                 int num_msg,
719                 const struct pam_message **msg,
720                 struct pam_response **resp,
721                 void *appdata_ptr) {
722
723         /* We don't support conversations */
724
725         return PAM_CONV_ERR;
726 }
727
728 static int setup_pam(
729                 const char *name,
730                 const char *user,
731                 uid_t uid,
732                 const char *tty,
733                 char ***pam_env,
734                 int fds[], unsigned n_fds) {
735
736         static const struct pam_conv conv = {
737                 .conv = null_conv,
738                 .appdata_ptr = NULL
739         };
740
741         pam_handle_t *handle = NULL;
742         sigset_t ss, old_ss;
743         int pam_code = PAM_SUCCESS;
744         int err;
745         char **e = NULL;
746         bool close_session = false;
747         pid_t pam_pid = 0, parent_pid;
748         int flags = 0;
749
750         assert(name);
751         assert(user);
752         assert(pam_env);
753
754         /* We set up PAM in the parent process, then fork. The child
755          * will then stay around until killed via PR_GET_PDEATHSIG or
756          * systemd via the cgroup logic. It will then remove the PAM
757          * session again. The parent process will exec() the actual
758          * daemon. We do things this way to ensure that the main PID
759          * of the daemon is the one we initially fork()ed. */
760
761         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
762                 flags |= PAM_SILENT;
763
764         pam_code = pam_start(name, user, &conv, &handle);
765         if (pam_code != PAM_SUCCESS) {
766                 handle = NULL;
767                 goto fail;
768         }
769
770         if (tty) {
771                 pam_code = pam_set_item(handle, PAM_TTY, tty);
772                 if (pam_code != PAM_SUCCESS)
773                         goto fail;
774         }
775
776         pam_code = pam_acct_mgmt(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         pam_code = pam_open_session(handle, flags);
781         if (pam_code != PAM_SUCCESS)
782                 goto fail;
783
784         close_session = true;
785
786         e = pam_getenvlist(handle);
787         if (!e) {
788                 pam_code = PAM_BUF_ERR;
789                 goto fail;
790         }
791
792         /* Block SIGTERM, so that we know that it won't get lost in
793          * the child */
794         if (sigemptyset(&ss) < 0 ||
795             sigaddset(&ss, SIGTERM) < 0 ||
796             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
797                 goto fail;
798
799         parent_pid = getpid();
800
801         pam_pid = fork();
802         if (pam_pid < 0)
803                 goto fail;
804
805         if (pam_pid == 0) {
806                 int sig;
807                 int r = EXIT_PAM;
808
809                 /* The child's job is to reset the PAM session on
810                  * termination */
811
812                 /* This string must fit in 10 chars (i.e. the length
813                  * of "/sbin/init"), to look pretty in /bin/ps */
814                 rename_process("(sd-pam)");
815
816                 /* Make sure we don't keep open the passed fds in this
817                 child. We assume that otherwise only those fds are
818                 open here that have been opened by PAM. */
819                 close_many(fds, n_fds);
820
821                 /* Drop privileges - we don't need any to pam_close_session
822                  * and this will make PR_SET_PDEATHSIG work in most cases.
823                  * If this fails, ignore the error - but expect sd-pam threads
824                  * to fail to exit normally */
825                 if (setresuid(uid, uid, uid) < 0)
826                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
827
828                 /* Wait until our parent died. This will only work if
829                  * the above setresuid() succeeds, otherwise the kernel
830                  * will not allow unprivileged parents kill their privileged
831                  * children this way. We rely on the control groups kill logic
832                  * to do the rest for us. */
833                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
834                         goto child_finish;
835
836                 /* Check if our parent process might already have
837                  * died? */
838                 if (getppid() == parent_pid) {
839                         for (;;) {
840                                 if (sigwait(&ss, &sig) < 0) {
841                                         if (errno == EINTR)
842                                                 continue;
843
844                                         goto child_finish;
845                                 }
846
847                                 assert(sig == SIGTERM);
848                                 break;
849                         }
850                 }
851
852                 /* If our parent died we'll end the session */
853                 if (getppid() != parent_pid) {
854                         pam_code = pam_close_session(handle, flags);
855                         if (pam_code != PAM_SUCCESS)
856                                 goto child_finish;
857                 }
858
859                 r = 0;
860
861         child_finish:
862                 pam_end(handle, pam_code | flags);
863                 _exit(r);
864         }
865
866         /* If the child was forked off successfully it will do all the
867          * cleanups, so forget about the handle here. */
868         handle = NULL;
869
870         /* Unblock SIGTERM again in the parent */
871         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
872                 goto fail;
873
874         /* We close the log explicitly here, since the PAM modules
875          * might have opened it, but we don't want this fd around. */
876         closelog();
877
878         *pam_env = e;
879         e = NULL;
880
881         return 0;
882
883 fail:
884         if (pam_code != PAM_SUCCESS) {
885                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
886                 err = -EPERM;  /* PAM errors do not map to errno */
887         } else {
888                 log_error("PAM failed: %m");
889                 err = -errno;
890         }
891
892         if (handle) {
893                 if (close_session)
894                         pam_code = pam_close_session(handle, flags);
895
896                 pam_end(handle, pam_code | flags);
897         }
898
899         strv_free(e);
900
901         closelog();
902
903         if (pam_pid > 1) {
904                 kill(pam_pid, SIGTERM);
905                 kill(pam_pid, SIGCONT);
906         }
907
908         return err;
909 }
910 #endif
911
912 static void rename_process_from_path(const char *path) {
913         char process_name[11];
914         const char *p;
915         size_t l;
916
917         /* This resulting string must fit in 10 chars (i.e. the length
918          * of "/sbin/init") to look pretty in /bin/ps */
919
920         p = basename(path);
921         if (isempty(p)) {
922                 rename_process("(...)");
923                 return;
924         }
925
926         l = strlen(p);
927         if (l > 8) {
928                 /* The end of the process name is usually more
929                  * interesting, since the first bit might just be
930                  * "systemd-" */
931                 p = p + l - 8;
932                 l = 8;
933         }
934
935         process_name[0] = '(';
936         memcpy(process_name+1, p, l);
937         process_name[1+l] = ')';
938         process_name[1+l+1] = 0;
939
940         rename_process(process_name);
941 }
942
943 #ifdef HAVE_SECCOMP
944
945 static int apply_seccomp(ExecContext *c) {
946         uint32_t negative_action, action;
947         scmp_filter_ctx *seccomp;
948         Iterator i;
949         void *id;
950         int r;
951
952         assert(c);
953
954         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
955
956         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
957         if (!seccomp)
958                 return -ENOMEM;
959
960         if (c->syscall_archs) {
961
962                 SET_FOREACH(id, c->syscall_archs, i) {
963                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
964                         if (r == -EEXIST)
965                                 continue;
966                         if (r < 0) {
967                                 seccomp_release(seccomp);
968                                 return r;
969                         }
970                 }
971         } else {
972
973                 r = seccomp_add_secondary_archs(seccomp);
974                 if (r < 0) {
975                         seccomp_release(seccomp);
976                         return r;
977                 }
978         }
979
980         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
981         SET_FOREACH(id, c->syscall_filter, i) {
982                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
983                 if (r < 0) {
984                         seccomp_release(seccomp);
985                         return r;
986                 }
987         }
988
989         r = seccomp_load(seccomp);
990         seccomp_release(seccomp);
991
992         return r;
993 }
994 #endif
995
996 static void do_idle_pipe_dance(int idle_pipe[4]) {
997         assert(idle_pipe);
998
999         if (idle_pipe[1] >= 0)
1000                 close_nointr_nofail(idle_pipe[1]);
1001         if (idle_pipe[2] >= 0)
1002                 close_nointr_nofail(idle_pipe[2]);
1003
1004         if (idle_pipe[0] >= 0) {
1005                 int r;
1006
1007                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1008
1009                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1010                         /* Signal systemd that we are bored and want to continue. */
1011                         write(idle_pipe[3], "x", 1);
1012
1013                         /* Wait for systemd to react to the signal above. */
1014                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1015                 }
1016
1017                 close_nointr_nofail(idle_pipe[0]);
1018
1019         }
1020
1021         if (idle_pipe[3] >= 0)
1022                 close_nointr_nofail(idle_pipe[3]);
1023 }
1024
1025 static int build_environment(
1026                 ExecContext *c,
1027                 unsigned n_fds,
1028                 usec_t watchdog_usec,
1029                 const char *home,
1030                 const char *username,
1031                 const char *shell,
1032                 char ***ret) {
1033
1034         _cleanup_strv_free_ char **our_env = NULL;
1035         unsigned n_env = 0;
1036         char *x;
1037
1038         assert(c);
1039         assert(ret);
1040
1041         our_env = new0(char*, 10);
1042         if (!our_env)
1043                 return -ENOMEM;
1044
1045         if (n_fds > 0) {
1046                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1047                         return -ENOMEM;
1048                 our_env[n_env++] = x;
1049
1050                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1051                         return -ENOMEM;
1052                 our_env[n_env++] = x;
1053         }
1054
1055         if (watchdog_usec > 0) {
1056                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1057                         return -ENOMEM;
1058                 our_env[n_env++] = x;
1059
1060                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1061                         return -ENOMEM;
1062                 our_env[n_env++] = x;
1063         }
1064
1065         if (home) {
1066                 x = strappend("HOME=", home);
1067                 if (!x)
1068                         return -ENOMEM;
1069                 our_env[n_env++] = x;
1070         }
1071
1072         if (username) {
1073                 x = strappend("LOGNAME=", username);
1074                 if (!x)
1075                         return -ENOMEM;
1076                 our_env[n_env++] = x;
1077
1078                 x = strappend("USER=", username);
1079                 if (!x)
1080                         return -ENOMEM;
1081                 our_env[n_env++] = x;
1082         }
1083
1084         if (shell) {
1085                 x = strappend("SHELL=", shell);
1086                 if (!x)
1087                         return -ENOMEM;
1088                 our_env[n_env++] = x;
1089         }
1090
1091         if (is_terminal_input(c->std_input) ||
1092             c->std_output == EXEC_OUTPUT_TTY ||
1093             c->std_error == EXEC_OUTPUT_TTY ||
1094             c->tty_path) {
1095
1096                 x = strdup(default_term_for_tty(tty_path(c)));
1097                 if (!x)
1098                         return -ENOMEM;
1099                 our_env[n_env++] = x;
1100         }
1101
1102         our_env[n_env++] = NULL;
1103         assert(n_env <= 10);
1104
1105         *ret = our_env;
1106         our_env = NULL;
1107
1108         return 0;
1109 }
1110
1111 int exec_spawn(ExecCommand *command,
1112                char **argv,
1113                ExecContext *context,
1114                int fds[], unsigned n_fds,
1115                char **environment,
1116                bool apply_permissions,
1117                bool apply_chroot,
1118                bool apply_tty_stdin,
1119                bool confirm_spawn,
1120                CGroupControllerMask cgroup_supported,
1121                const char *cgroup_path,
1122                const char *unit_id,
1123                usec_t watchdog_usec,
1124                int idle_pipe[4],
1125                ExecRuntime *runtime,
1126                pid_t *ret) {
1127
1128         _cleanup_strv_free_ char **files_env = NULL;
1129         int socket_fd;
1130         char *line;
1131         pid_t pid;
1132         int r;
1133
1134         assert(command);
1135         assert(context);
1136         assert(ret);
1137         assert(fds || n_fds <= 0);
1138
1139         if (context->std_input == EXEC_INPUT_SOCKET ||
1140             context->std_output == EXEC_OUTPUT_SOCKET ||
1141             context->std_error == EXEC_OUTPUT_SOCKET) {
1142
1143                 if (n_fds != 1)
1144                         return -EINVAL;
1145
1146                 socket_fd = fds[0];
1147
1148                 fds = NULL;
1149                 n_fds = 0;
1150         } else
1151                 socket_fd = -1;
1152
1153         r = exec_context_load_environment(context, &files_env);
1154         if (r < 0) {
1155                 log_struct_unit(LOG_ERR,
1156                            unit_id,
1157                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1158                            "ERRNO=%d", -r,
1159                            NULL);
1160                 return r;
1161         }
1162
1163         if (!argv)
1164                 argv = command->argv;
1165
1166         line = exec_command_line(argv);
1167         if (!line)
1168                 return log_oom();
1169
1170         log_struct_unit(LOG_DEBUG,
1171                         unit_id,
1172                         "EXECUTABLE=%s", command->path,
1173                         "MESSAGE=About to execute: %s", line,
1174                         NULL);
1175         free(line);
1176
1177         pid = fork();
1178         if (pid < 0)
1179                 return -errno;
1180
1181         if (pid == 0) {
1182                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1183                 const char *username = NULL, *home = NULL, *shell = NULL;
1184                 unsigned n_dont_close = 0;
1185                 int dont_close[n_fds + 3];
1186                 uid_t uid = (uid_t) -1;
1187                 gid_t gid = (gid_t) -1;
1188                 sigset_t ss;
1189                 int i, err;
1190
1191                 /* child */
1192
1193                 rename_process_from_path(command->path);
1194
1195                 /* We reset exactly these signals, since they are the
1196                  * only ones we set to SIG_IGN in the main daemon. All
1197                  * others we leave untouched because we set them to
1198                  * SIG_DFL or a valid handler initially, both of which
1199                  * will be demoted to SIG_DFL. */
1200                 default_signals(SIGNALS_CRASH_HANDLER,
1201                                 SIGNALS_IGNORE, -1);
1202
1203                 if (context->ignore_sigpipe)
1204                         ignore_signals(SIGPIPE, -1);
1205
1206                 assert_se(sigemptyset(&ss) == 0);
1207                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1208                         err = -errno;
1209                         r = EXIT_SIGNAL_MASK;
1210                         goto fail_child;
1211                 }
1212
1213                 if (idle_pipe)
1214                         do_idle_pipe_dance(idle_pipe);
1215
1216                 /* Close sockets very early to make sure we don't
1217                  * block init reexecution because it cannot bind its
1218                  * sockets */
1219                 log_forget_fds();
1220
1221                 if (socket_fd >= 0)
1222                         dont_close[n_dont_close++] = socket_fd;
1223                 if (n_fds > 0) {
1224                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1225                         n_dont_close += n_fds;
1226                 }
1227                 if (runtime) {
1228                         if (runtime->netns_storage_socket[0] >= 0)
1229                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1230                         if (runtime->netns_storage_socket[1] >= 0)
1231                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1232                 }
1233
1234                 err = close_all_fds(dont_close, n_dont_close);
1235                 if (err < 0) {
1236                         r = EXIT_FDS;
1237                         goto fail_child;
1238                 }
1239
1240                 if (!context->same_pgrp)
1241                         if (setsid() < 0) {
1242                                 err = -errno;
1243                                 r = EXIT_SETSID;
1244                                 goto fail_child;
1245                         }
1246
1247                 if (context->tcpwrap_name) {
1248                         if (socket_fd >= 0)
1249                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1250                                         err = -EACCES;
1251                                         r = EXIT_TCPWRAP;
1252                                         goto fail_child;
1253                                 }
1254
1255                         for (i = 0; i < (int) n_fds; i++) {
1256                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1257                                         err = -EACCES;
1258                                         r = EXIT_TCPWRAP;
1259                                         goto fail_child;
1260                                 }
1261                         }
1262                 }
1263
1264                 exec_context_tty_reset(context);
1265
1266                 if (confirm_spawn) {
1267                         char response;
1268
1269                         err = ask_for_confirmation(&response, argv);
1270                         if (err == -ETIMEDOUT)
1271                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1272                         else if (err < 0)
1273                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1274                         else if (response == 's') {
1275                                 write_confirm_message("Skipping execution.\n");
1276                                 err = -ECANCELED;
1277                                 r = EXIT_CONFIRM;
1278                                 goto fail_child;
1279                         } else if (response == 'n') {
1280                                 write_confirm_message("Failing execution.\n");
1281                                 err = r = 0;
1282                                 goto fail_child;
1283                         }
1284                 }
1285
1286                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1287                  * must sure to drop O_NONBLOCK */
1288                 if (socket_fd >= 0)
1289                         fd_nonblock(socket_fd, false);
1290
1291                 err = setup_input(context, socket_fd, apply_tty_stdin);
1292                 if (err < 0) {
1293                         r = EXIT_STDIN;
1294                         goto fail_child;
1295                 }
1296
1297                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1298                 if (err < 0) {
1299                         r = EXIT_STDOUT;
1300                         goto fail_child;
1301                 }
1302
1303                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1304                 if (err < 0) {
1305                         r = EXIT_STDERR;
1306                         goto fail_child;
1307                 }
1308
1309                 if (cgroup_path) {
1310                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1311                         if (err < 0) {
1312                                 r = EXIT_CGROUP;
1313                                 goto fail_child;
1314                         }
1315                 }
1316
1317                 if (context->oom_score_adjust_set) {
1318                         char t[16];
1319
1320                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1321                         char_array_0(t);
1322
1323                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1324                                 err = -errno;
1325                                 r = EXIT_OOM_ADJUST;
1326                                 goto fail_child;
1327                         }
1328                 }
1329
1330                 if (context->nice_set)
1331                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1332                                 err = -errno;
1333                                 r = EXIT_NICE;
1334                                 goto fail_child;
1335                         }
1336
1337                 if (context->cpu_sched_set) {
1338                         struct sched_param param = {
1339                                 .sched_priority = context->cpu_sched_priority,
1340                         };
1341
1342                         r = sched_setscheduler(0,
1343                                                context->cpu_sched_policy |
1344                                                (context->cpu_sched_reset_on_fork ?
1345                                                 SCHED_RESET_ON_FORK : 0),
1346                                                &param);
1347                         if (r < 0) {
1348                                 err = -errno;
1349                                 r = EXIT_SETSCHEDULER;
1350                                 goto fail_child;
1351                         }
1352                 }
1353
1354                 if (context->cpuset)
1355                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1356                                 err = -errno;
1357                                 r = EXIT_CPUAFFINITY;
1358                                 goto fail_child;
1359                         }
1360
1361                 if (context->ioprio_set)
1362                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1363                                 err = -errno;
1364                                 r = EXIT_IOPRIO;
1365                                 goto fail_child;
1366                         }
1367
1368                 if (context->timer_slack_nsec != (nsec_t) -1)
1369                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1370                                 err = -errno;
1371                                 r = EXIT_TIMERSLACK;
1372                                 goto fail_child;
1373                         }
1374
1375                 if (context->utmp_id)
1376                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1377
1378                 if (context->user) {
1379                         username = context->user;
1380                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1381                         if (err < 0) {
1382                                 r = EXIT_USER;
1383                                 goto fail_child;
1384                         }
1385
1386                         if (is_terminal_input(context->std_input)) {
1387                                 err = chown_terminal(STDIN_FILENO, uid);
1388                                 if (err < 0) {
1389                                         r = EXIT_STDIN;
1390                                         goto fail_child;
1391                                 }
1392                         }
1393                 }
1394
1395 #ifdef HAVE_PAM
1396                 if (cgroup_path && context->user && context->pam_name) {
1397                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1398                         if (err < 0) {
1399                                 r = EXIT_CGROUP;
1400                                 goto fail_child;
1401                         }
1402
1403
1404                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1405                         if (err < 0) {
1406                                 r = EXIT_CGROUP;
1407                                 goto fail_child;
1408                         }
1409                 }
1410 #endif
1411
1412                 if (apply_permissions) {
1413                         err = enforce_groups(context, username, gid);
1414                         if (err < 0) {
1415                                 r = EXIT_GROUP;
1416                                 goto fail_child;
1417                         }
1418                 }
1419
1420                 umask(context->umask);
1421
1422 #ifdef HAVE_PAM
1423                 if (apply_permissions && context->pam_name && username) {
1424                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1425                         if (err < 0) {
1426                                 r = EXIT_PAM;
1427                                 goto fail_child;
1428                         }
1429                 }
1430 #endif
1431                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1432                         err = setup_netns(runtime->netns_storage_socket);
1433                         if (err < 0) {
1434                                 r = EXIT_NETWORK;
1435                                 goto fail_child;
1436                         }
1437                 }
1438
1439                 if (!strv_isempty(context->read_write_dirs) ||
1440                     !strv_isempty(context->read_only_dirs) ||
1441                     !strv_isempty(context->inaccessible_dirs) ||
1442                     context->mount_flags != 0 ||
1443                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1444                     context->private_devices) {
1445
1446                         char *tmp = NULL, *var = NULL;
1447
1448                         /* The runtime struct only contains the parent
1449                          * of the private /tmp, which is
1450                          * non-accessible to world users. Inside of it
1451                          * there's a /tmp that is sticky, and that's
1452                          * the one we want to use here. */
1453
1454                         if (context->private_tmp && runtime) {
1455                                 if (runtime->tmp_dir)
1456                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1457                                 if (runtime->var_tmp_dir)
1458                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1459                         }
1460
1461                         err = setup_namespace(
1462                                         context->read_write_dirs,
1463                                         context->read_only_dirs,
1464                                         context->inaccessible_dirs,
1465                                         tmp,
1466                                         var,
1467                                         context->private_devices,
1468                                         context->mount_flags);
1469
1470                         if (err < 0) {
1471                                 r = EXIT_NAMESPACE;
1472                                 goto fail_child;
1473                         }
1474                 }
1475
1476                 if (apply_chroot) {
1477                         if (context->root_directory)
1478                                 if (chroot(context->root_directory) < 0) {
1479                                         err = -errno;
1480                                         r = EXIT_CHROOT;
1481                                         goto fail_child;
1482                                 }
1483
1484                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1485                                 err = -errno;
1486                                 r = EXIT_CHDIR;
1487                                 goto fail_child;
1488                         }
1489                 } else {
1490                         _cleanup_free_ char *d = NULL;
1491
1492                         if (asprintf(&d, "%s/%s",
1493                                      context->root_directory ? context->root_directory : "",
1494                                      context->working_directory ? context->working_directory : "") < 0) {
1495                                 err = -ENOMEM;
1496                                 r = EXIT_MEMORY;
1497                                 goto fail_child;
1498                         }
1499
1500                         if (chdir(d) < 0) {
1501                                 err = -errno;
1502                                 r = EXIT_CHDIR;
1503                                 goto fail_child;
1504                         }
1505                 }
1506
1507                 /* We repeat the fd closing here, to make sure that
1508                  * nothing is leaked from the PAM modules */
1509                 err = close_all_fds(fds, n_fds);
1510                 if (err >= 0)
1511                         err = shift_fds(fds, n_fds);
1512                 if (err >= 0)
1513                         err = flags_fds(fds, n_fds, context->non_blocking);
1514                 if (err < 0) {
1515                         r = EXIT_FDS;
1516                         goto fail_child;
1517                 }
1518
1519                 if (apply_permissions) {
1520
1521                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1522                                 if (!context->rlimit[i])
1523                                         continue;
1524
1525                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1526                                         err = -errno;
1527                                         r = EXIT_LIMITS;
1528                                         goto fail_child;
1529                                 }
1530                         }
1531
1532                         if (context->capability_bounding_set_drop) {
1533                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1534                                 if (err < 0) {
1535                                         r = EXIT_CAPABILITIES;
1536                                         goto fail_child;
1537                                 }
1538                         }
1539
1540                         if (context->user) {
1541                                 err = enforce_user(context, uid);
1542                                 if (err < 0) {
1543                                         r = EXIT_USER;
1544                                         goto fail_child;
1545                                 }
1546                         }
1547
1548                         /* PR_GET_SECUREBITS is not privileged, while
1549                          * PR_SET_SECUREBITS is. So to suppress
1550                          * potential EPERMs we'll try not to call
1551                          * PR_SET_SECUREBITS unless necessary. */
1552                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1553                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1554                                         err = -errno;
1555                                         r = EXIT_SECUREBITS;
1556                                         goto fail_child;
1557                                 }
1558
1559                         if (context->capabilities)
1560                                 if (cap_set_proc(context->capabilities) < 0) {
1561                                         err = -errno;
1562                                         r = EXIT_CAPABILITIES;
1563                                         goto fail_child;
1564                                 }
1565
1566                         if (context->no_new_privileges)
1567                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1568                                         err = -errno;
1569                                         r = EXIT_NO_NEW_PRIVILEGES;
1570                                         goto fail_child;
1571                                 }
1572
1573 #ifdef HAVE_SECCOMP
1574                         if (context->syscall_filter || context->syscall_archs) {
1575                                 err = apply_seccomp(context);
1576                                 if (err < 0) {
1577                                         r = EXIT_SECCOMP;
1578                                         goto fail_child;
1579                                 }
1580                         }
1581 #endif
1582
1583 #ifdef HAVE_SELINUX
1584                         if (context->selinux_context && use_selinux()) {
1585                                 err = setexeccon(context->selinux_context);
1586                                 if (err < 0 && !context->selinux_context_ignore) {
1587                                         r = EXIT_SELINUX_CONTEXT;
1588                                         goto fail_child;
1589                                 }
1590                         }
1591 #endif
1592                 }
1593
1594                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1595                 if (r < 0) {
1596                         r = EXIT_MEMORY;
1597                         goto fail_child;
1598                 }
1599
1600                 final_env = strv_env_merge(5,
1601                                            environment,
1602                                            our_env,
1603                                            context->environment,
1604                                            files_env,
1605                                            pam_env,
1606                                            NULL);
1607                 if (!final_env) {
1608                         err = -ENOMEM;
1609                         r = EXIT_MEMORY;
1610                         goto fail_child;
1611                 }
1612
1613                 final_argv = replace_env_argv(argv, final_env);
1614                 if (!final_argv) {
1615                         err = -ENOMEM;
1616                         r = EXIT_MEMORY;
1617                         goto fail_child;
1618                 }
1619
1620                 final_env = strv_env_clean(final_env);
1621
1622                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1623                         line = exec_command_line(final_argv);
1624                         if (line) {
1625                                 log_open();
1626                                 log_struct_unit(LOG_DEBUG,
1627                                                 unit_id,
1628                                                 "EXECUTABLE=%s", command->path,
1629                                                 "MESSAGE=Executing: %s", line,
1630                                                 NULL);
1631                                 log_close();
1632                                 free(line);
1633                                 line = NULL;
1634                         }
1635                 }
1636                 execve(command->path, final_argv, final_env);
1637                 err = -errno;
1638                 r = EXIT_EXEC;
1639
1640         fail_child:
1641                 if (r != 0) {
1642                         log_open();
1643                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1644                                    "EXECUTABLE=%s", command->path,
1645                                    "MESSAGE=Failed at step %s spawning %s: %s",
1646                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1647                                           command->path, strerror(-err),
1648                                    "ERRNO=%d", -err,
1649                                    NULL);
1650                         log_close();
1651                 }
1652
1653                 _exit(r);
1654         }
1655
1656         log_struct_unit(LOG_DEBUG,
1657                         unit_id,
1658                         "MESSAGE=Forked %s as "PID_FMT,
1659                         command->path, pid,
1660                         NULL);
1661
1662         /* We add the new process to the cgroup both in the child (so
1663          * that we can be sure that no user code is ever executed
1664          * outside of the cgroup) and in the parent (so that we can be
1665          * sure that when we kill the cgroup the process will be
1666          * killed too). */
1667         if (cgroup_path)
1668                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1669
1670         exec_status_start(&command->exec_status, pid);
1671
1672         *ret = pid;
1673         return 0;
1674 }
1675
1676 void exec_context_init(ExecContext *c) {
1677         assert(c);
1678
1679         c->umask = 0022;
1680         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1681         c->cpu_sched_policy = SCHED_OTHER;
1682         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1683         c->syslog_level_prefix = true;
1684         c->ignore_sigpipe = true;
1685         c->timer_slack_nsec = (nsec_t) -1;
1686 }
1687
1688 void exec_context_done(ExecContext *c) {
1689         unsigned l;
1690
1691         assert(c);
1692
1693         strv_free(c->environment);
1694         c->environment = NULL;
1695
1696         strv_free(c->environment_files);
1697         c->environment_files = NULL;
1698
1699         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1700                 free(c->rlimit[l]);
1701                 c->rlimit[l] = NULL;
1702         }
1703
1704         free(c->working_directory);
1705         c->working_directory = NULL;
1706         free(c->root_directory);
1707         c->root_directory = NULL;
1708
1709         free(c->tty_path);
1710         c->tty_path = NULL;
1711
1712         free(c->tcpwrap_name);
1713         c->tcpwrap_name = NULL;
1714
1715         free(c->syslog_identifier);
1716         c->syslog_identifier = NULL;
1717
1718         free(c->user);
1719         c->user = NULL;
1720
1721         free(c->group);
1722         c->group = NULL;
1723
1724         strv_free(c->supplementary_groups);
1725         c->supplementary_groups = NULL;
1726
1727         free(c->pam_name);
1728         c->pam_name = NULL;
1729
1730         if (c->capabilities) {
1731                 cap_free(c->capabilities);
1732                 c->capabilities = NULL;
1733         }
1734
1735         strv_free(c->read_only_dirs);
1736         c->read_only_dirs = NULL;
1737
1738         strv_free(c->read_write_dirs);
1739         c->read_write_dirs = NULL;
1740
1741         strv_free(c->inaccessible_dirs);
1742         c->inaccessible_dirs = NULL;
1743
1744         if (c->cpuset)
1745                 CPU_FREE(c->cpuset);
1746
1747         free(c->utmp_id);
1748         c->utmp_id = NULL;
1749
1750         free(c->selinux_context);
1751         c->selinux_context = NULL;
1752
1753 #ifdef HAVE_SECCOMP
1754         set_free(c->syscall_filter);
1755         c->syscall_filter = NULL;
1756
1757         set_free(c->syscall_archs);
1758         c->syscall_archs = NULL;
1759 #endif
1760 }
1761
1762 void exec_command_done(ExecCommand *c) {
1763         assert(c);
1764
1765         free(c->path);
1766         c->path = NULL;
1767
1768         strv_free(c->argv);
1769         c->argv = NULL;
1770 }
1771
1772 void exec_command_done_array(ExecCommand *c, unsigned n) {
1773         unsigned i;
1774
1775         for (i = 0; i < n; i++)
1776                 exec_command_done(c+i);
1777 }
1778
1779 void exec_command_free_list(ExecCommand *c) {
1780         ExecCommand *i;
1781
1782         while ((i = c)) {
1783                 LIST_REMOVE(command, c, i);
1784                 exec_command_done(i);
1785                 free(i);
1786         }
1787 }
1788
1789 void exec_command_free_array(ExecCommand **c, unsigned n) {
1790         unsigned i;
1791
1792         for (i = 0; i < n; i++) {
1793                 exec_command_free_list(c[i]);
1794                 c[i] = NULL;
1795         }
1796 }
1797
1798 int exec_context_load_environment(const ExecContext *c, char ***l) {
1799         char **i, **r = NULL;
1800
1801         assert(c);
1802         assert(l);
1803
1804         STRV_FOREACH(i, c->environment_files) {
1805                 char *fn;
1806                 int k;
1807                 bool ignore = false;
1808                 char **p;
1809                 _cleanup_globfree_ glob_t pglob = {};
1810                 int count, n;
1811
1812                 fn = *i;
1813
1814                 if (fn[0] == '-') {
1815                         ignore = true;
1816                         fn ++;
1817                 }
1818
1819                 if (!path_is_absolute(fn)) {
1820                         if (ignore)
1821                                 continue;
1822
1823                         strv_free(r);
1824                         return -EINVAL;
1825                 }
1826
1827                 /* Filename supports globbing, take all matching files */
1828                 errno = 0;
1829                 if (glob(fn, 0, NULL, &pglob) != 0) {
1830                         if (ignore)
1831                                 continue;
1832
1833                         strv_free(r);
1834                         return errno ? -errno : -EINVAL;
1835                 }
1836                 count = pglob.gl_pathc;
1837                 if (count == 0) {
1838                         if (ignore)
1839                                 continue;
1840
1841                         strv_free(r);
1842                         return -EINVAL;
1843                 }
1844                 for (n = 0; n < count; n++) {
1845                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1846                         if (k < 0) {
1847                                 if (ignore)
1848                                         continue;
1849
1850                                 strv_free(r);
1851                                 return k;
1852                         }
1853                         /* Log invalid environment variables with filename */
1854                         if (p)
1855                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1856
1857                         if (r == NULL)
1858                                 r = p;
1859                         else {
1860                                 char **m;
1861
1862                                 m = strv_env_merge(2, r, p);
1863                                 strv_free(r);
1864                                 strv_free(p);
1865                                 if (!m)
1866                                         return -ENOMEM;
1867
1868                                 r = m;
1869                         }
1870                 }
1871         }
1872
1873         *l = r;
1874
1875         return 0;
1876 }
1877
1878 static bool tty_may_match_dev_console(const char *tty) {
1879         char *active = NULL, *console;
1880         bool b;
1881
1882         if (startswith(tty, "/dev/"))
1883                 tty += 5;
1884
1885         /* trivial identity? */
1886         if (streq(tty, "console"))
1887                 return true;
1888
1889         console = resolve_dev_console(&active);
1890         /* if we could not resolve, assume it may */
1891         if (!console)
1892                 return true;
1893
1894         /* "tty0" means the active VC, so it may be the same sometimes */
1895         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1896         free(active);
1897
1898         return b;
1899 }
1900
1901 bool exec_context_may_touch_console(ExecContext *ec) {
1902         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1903                 is_terminal_input(ec->std_input) ||
1904                 is_terminal_output(ec->std_output) ||
1905                 is_terminal_output(ec->std_error)) &&
1906                tty_may_match_dev_console(tty_path(ec));
1907 }
1908
1909 static void strv_fprintf(FILE *f, char **l) {
1910         char **g;
1911
1912         assert(f);
1913
1914         STRV_FOREACH(g, l)
1915                 fprintf(f, " %s", *g);
1916 }
1917
1918 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1919         char **e;
1920         unsigned i;
1921
1922         assert(c);
1923         assert(f);
1924
1925         prefix = strempty(prefix);
1926
1927         fprintf(f,
1928                 "%sUMask: %04o\n"
1929                 "%sWorkingDirectory: %s\n"
1930                 "%sRootDirectory: %s\n"
1931                 "%sNonBlocking: %s\n"
1932                 "%sPrivateTmp: %s\n"
1933                 "%sPrivateNetwork: %s\n"
1934                 "%sPrivateDevices: %s\n"
1935                 "%sIgnoreSIGPIPE: %s\n",
1936                 prefix, c->umask,
1937                 prefix, c->working_directory ? c->working_directory : "/",
1938                 prefix, c->root_directory ? c->root_directory : "/",
1939                 prefix, yes_no(c->non_blocking),
1940                 prefix, yes_no(c->private_tmp),
1941                 prefix, yes_no(c->private_network),
1942                 prefix, yes_no(c->private_devices),
1943                 prefix, yes_no(c->ignore_sigpipe));
1944
1945         STRV_FOREACH(e, c->environment)
1946                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1947
1948         STRV_FOREACH(e, c->environment_files)
1949                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1950
1951         if (c->tcpwrap_name)
1952                 fprintf(f,
1953                         "%sTCPWrapName: %s\n",
1954                         prefix, c->tcpwrap_name);
1955
1956         if (c->nice_set)
1957                 fprintf(f,
1958                         "%sNice: %i\n",
1959                         prefix, c->nice);
1960
1961         if (c->oom_score_adjust_set)
1962                 fprintf(f,
1963                         "%sOOMScoreAdjust: %i\n",
1964                         prefix, c->oom_score_adjust);
1965
1966         for (i = 0; i < RLIM_NLIMITS; i++)
1967                 if (c->rlimit[i])
1968                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1969
1970         if (c->ioprio_set) {
1971                 char *class_str;
1972                 int r;
1973
1974                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1975                 if (r < 0)
1976                         class_str = NULL;
1977                 fprintf(f,
1978                         "%sIOSchedulingClass: %s\n"
1979                         "%sIOPriority: %i\n",
1980                         prefix, strna(class_str),
1981                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1982                 free(class_str);
1983         }
1984
1985         if (c->cpu_sched_set) {
1986                 char *policy_str;
1987                 int r;
1988
1989                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1990                 if (r < 0)
1991                         policy_str = NULL;
1992                 fprintf(f,
1993                         "%sCPUSchedulingPolicy: %s\n"
1994                         "%sCPUSchedulingPriority: %i\n"
1995                         "%sCPUSchedulingResetOnFork: %s\n",
1996                         prefix, strna(policy_str),
1997                         prefix, c->cpu_sched_priority,
1998                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1999                 free(policy_str);
2000         }
2001
2002         if (c->cpuset) {
2003                 fprintf(f, "%sCPUAffinity:", prefix);
2004                 for (i = 0; i < c->cpuset_ncpus; i++)
2005                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2006                                 fprintf(f, " %u", i);
2007                 fputs("\n", f);
2008         }
2009
2010         if (c->timer_slack_nsec != (nsec_t) -1)
2011                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2012
2013         fprintf(f,
2014                 "%sStandardInput: %s\n"
2015                 "%sStandardOutput: %s\n"
2016                 "%sStandardError: %s\n",
2017                 prefix, exec_input_to_string(c->std_input),
2018                 prefix, exec_output_to_string(c->std_output),
2019                 prefix, exec_output_to_string(c->std_error));
2020
2021         if (c->tty_path)
2022                 fprintf(f,
2023                         "%sTTYPath: %s\n"
2024                         "%sTTYReset: %s\n"
2025                         "%sTTYVHangup: %s\n"
2026                         "%sTTYVTDisallocate: %s\n",
2027                         prefix, c->tty_path,
2028                         prefix, yes_no(c->tty_reset),
2029                         prefix, yes_no(c->tty_vhangup),
2030                         prefix, yes_no(c->tty_vt_disallocate));
2031
2032         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2033             c->std_output == EXEC_OUTPUT_KMSG ||
2034             c->std_output == EXEC_OUTPUT_JOURNAL ||
2035             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2036             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2037             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2038             c->std_error == EXEC_OUTPUT_SYSLOG ||
2039             c->std_error == EXEC_OUTPUT_KMSG ||
2040             c->std_error == EXEC_OUTPUT_JOURNAL ||
2041             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2042             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2043             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2044
2045                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2046
2047                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2048                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2049
2050                 fprintf(f,
2051                         "%sSyslogFacility: %s\n"
2052                         "%sSyslogLevel: %s\n",
2053                         prefix, strna(fac_str),
2054                         prefix, strna(lvl_str));
2055         }
2056
2057         if (c->capabilities) {
2058                 _cleanup_cap_free_charp_ char *t;
2059
2060                 t = cap_to_text(c->capabilities, NULL);
2061                 if (t)
2062                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2063         }
2064
2065         if (c->secure_bits)
2066                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2067                         prefix,
2068                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2069                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2070                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2071                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2072                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2073                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2074
2075         if (c->capability_bounding_set_drop) {
2076                 unsigned long l;
2077                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2078
2079                 for (l = 0; l <= cap_last_cap(); l++)
2080                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2081                                 _cleanup_cap_free_charp_ char *t;
2082
2083                                 t = cap_to_name(l);
2084                                 if (t)
2085                                         fprintf(f, " %s", t);
2086                         }
2087
2088                 fputs("\n", f);
2089         }
2090
2091         if (c->user)
2092                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2093         if (c->group)
2094                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2095
2096         if (strv_length(c->supplementary_groups) > 0) {
2097                 fprintf(f, "%sSupplementaryGroups:", prefix);
2098                 strv_fprintf(f, c->supplementary_groups);
2099                 fputs("\n", f);
2100         }
2101
2102         if (c->pam_name)
2103                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2104
2105         if (strv_length(c->read_write_dirs) > 0) {
2106                 fprintf(f, "%sReadWriteDirs:", prefix);
2107                 strv_fprintf(f, c->read_write_dirs);
2108                 fputs("\n", f);
2109         }
2110
2111         if (strv_length(c->read_only_dirs) > 0) {
2112                 fprintf(f, "%sReadOnlyDirs:", prefix);
2113                 strv_fprintf(f, c->read_only_dirs);
2114                 fputs("\n", f);
2115         }
2116
2117         if (strv_length(c->inaccessible_dirs) > 0) {
2118                 fprintf(f, "%sInaccessibleDirs:", prefix);
2119                 strv_fprintf(f, c->inaccessible_dirs);
2120                 fputs("\n", f);
2121         }
2122
2123         if (c->utmp_id)
2124                 fprintf(f,
2125                         "%sUtmpIdentifier: %s\n",
2126                         prefix, c->utmp_id);
2127
2128         if (c->selinux_context)
2129                 fprintf(f,
2130                         "%sSELinuxContext: %s%s\n",
2131                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2132
2133         if (c->syscall_filter) {
2134 #ifdef HAVE_SECCOMP
2135                 Iterator j;
2136                 void *id;
2137                 bool first = true;
2138 #endif
2139
2140                 fprintf(f,
2141                         "%sSystemCallFilter: ",
2142                         prefix);
2143
2144                 if (!c->syscall_whitelist)
2145                         fputc('~', f);
2146
2147 #ifdef HAVE_SECCOMP
2148                 SET_FOREACH(id, c->syscall_filter, j) {
2149                         _cleanup_free_ char *name = NULL;
2150
2151                         if (first)
2152                                 first = false;
2153                         else
2154                                 fputc(' ', f);
2155
2156                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2157                         fputs(strna(name), f);
2158                 }
2159 #endif
2160
2161                 fputc('\n', f);
2162         }
2163
2164         if (c->syscall_archs) {
2165 #ifdef HAVE_SECCOMP
2166                 Iterator j;
2167                 void *id;
2168 #endif
2169
2170                 fprintf(f,
2171                         "%sSystemCallArchitectures:",
2172                         prefix);
2173
2174 #ifdef HAVE_SECCOMP
2175                 SET_FOREACH(id, c->syscall_archs, j)
2176                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2177 #endif
2178                 fputc('\n', f);
2179         }
2180
2181         if (c->syscall_errno != 0)
2182                 fprintf(f,
2183                         "%sSystemCallErrorNumber: %s\n",
2184                         prefix, strna(errno_to_name(c->syscall_errno)));
2185 }
2186
2187 void exec_status_start(ExecStatus *s, pid_t pid) {
2188         assert(s);
2189
2190         zero(*s);
2191         s->pid = pid;
2192         dual_timestamp_get(&s->start_timestamp);
2193 }
2194
2195 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2196         assert(s);
2197
2198         if (s->pid && s->pid != pid)
2199                 zero(*s);
2200
2201         s->pid = pid;
2202         dual_timestamp_get(&s->exit_timestamp);
2203
2204         s->code = code;
2205         s->status = status;
2206
2207         if (context) {
2208                 if (context->utmp_id)
2209                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2210
2211                 exec_context_tty_reset(context);
2212         }
2213 }
2214
2215 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2216         char buf[FORMAT_TIMESTAMP_MAX];
2217
2218         assert(s);
2219         assert(f);
2220
2221         if (!prefix)
2222                 prefix = "";
2223
2224         if (s->pid <= 0)
2225                 return;
2226
2227         fprintf(f,
2228                 "%sPID: "PID_FMT"\n",
2229                 prefix, s->pid);
2230
2231         if (s->start_timestamp.realtime > 0)
2232                 fprintf(f,
2233                         "%sStart Timestamp: %s\n",
2234                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2235
2236         if (s->exit_timestamp.realtime > 0)
2237                 fprintf(f,
2238                         "%sExit Timestamp: %s\n"
2239                         "%sExit Code: %s\n"
2240                         "%sExit Status: %i\n",
2241                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2242                         prefix, sigchld_code_to_string(s->code),
2243                         prefix, s->status);
2244 }
2245
2246 char *exec_command_line(char **argv) {
2247         size_t k;
2248         char *n, *p, **a;
2249         bool first = true;
2250
2251         assert(argv);
2252
2253         k = 1;
2254         STRV_FOREACH(a, argv)
2255                 k += strlen(*a)+3;
2256
2257         if (!(n = new(char, k)))
2258                 return NULL;
2259
2260         p = n;
2261         STRV_FOREACH(a, argv) {
2262
2263                 if (!first)
2264                         *(p++) = ' ';
2265                 else
2266                         first = false;
2267
2268                 if (strpbrk(*a, WHITESPACE)) {
2269                         *(p++) = '\'';
2270                         p = stpcpy(p, *a);
2271                         *(p++) = '\'';
2272                 } else
2273                         p = stpcpy(p, *a);
2274
2275         }
2276
2277         *p = 0;
2278
2279         /* FIXME: this doesn't really handle arguments that have
2280          * spaces and ticks in them */
2281
2282         return n;
2283 }
2284
2285 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2286         char *p2;
2287         const char *prefix2;
2288
2289         char *cmd;
2290
2291         assert(c);
2292         assert(f);
2293
2294         if (!prefix)
2295                 prefix = "";
2296         p2 = strappend(prefix, "\t");
2297         prefix2 = p2 ? p2 : prefix;
2298
2299         cmd = exec_command_line(c->argv);
2300
2301         fprintf(f,
2302                 "%sCommand Line: %s\n",
2303                 prefix, cmd ? cmd : strerror(ENOMEM));
2304
2305         free(cmd);
2306
2307         exec_status_dump(&c->exec_status, f, prefix2);
2308
2309         free(p2);
2310 }
2311
2312 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2313         assert(f);
2314
2315         if (!prefix)
2316                 prefix = "";
2317
2318         LIST_FOREACH(command, c, c)
2319                 exec_command_dump(c, f, prefix);
2320 }
2321
2322 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2323         ExecCommand *end;
2324
2325         assert(l);
2326         assert(e);
2327
2328         if (*l) {
2329                 /* It's kind of important, that we keep the order here */
2330                 LIST_FIND_TAIL(command, *l, end);
2331                 LIST_INSERT_AFTER(command, *l, end, e);
2332         } else
2333               *l = e;
2334 }
2335
2336 int exec_command_set(ExecCommand *c, const char *path, ...) {
2337         va_list ap;
2338         char **l, *p;
2339
2340         assert(c);
2341         assert(path);
2342
2343         va_start(ap, path);
2344         l = strv_new_ap(path, ap);
2345         va_end(ap);
2346
2347         if (!l)
2348                 return -ENOMEM;
2349
2350         p = strdup(path);
2351         if (!p) {
2352                 strv_free(l);
2353                 return -ENOMEM;
2354         }
2355
2356         free(c->path);
2357         c->path = p;
2358
2359         strv_free(c->argv);
2360         c->argv = l;
2361
2362         return 0;
2363 }
2364
2365 static int exec_runtime_allocate(ExecRuntime **rt) {
2366
2367         if (*rt)
2368                 return 0;
2369
2370         *rt = new0(ExecRuntime, 1);
2371         if (!*rt)
2372                 return -ENOMEM;
2373
2374         (*rt)->n_ref = 1;
2375         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2376
2377         return 0;
2378 }
2379
2380 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2381         int r;
2382
2383         assert(rt);
2384         assert(c);
2385         assert(id);
2386
2387         if (*rt)
2388                 return 1;
2389
2390         if (!c->private_network && !c->private_tmp)
2391                 return 0;
2392
2393         r = exec_runtime_allocate(rt);
2394         if (r < 0)
2395                 return r;
2396
2397         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2398                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2399                         return -errno;
2400         }
2401
2402         if (c->private_tmp && !(*rt)->tmp_dir) {
2403                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2404                 if (r < 0)
2405                         return r;
2406         }
2407
2408         return 1;
2409 }
2410
2411 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2412         assert(r);
2413         assert(r->n_ref > 0);
2414
2415         r->n_ref++;
2416         return r;
2417 }
2418
2419 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2420
2421         if (!r)
2422                 return NULL;
2423
2424         assert(r->n_ref > 0);
2425
2426         r->n_ref--;
2427         if (r->n_ref <= 0) {
2428                 free(r->tmp_dir);
2429                 free(r->var_tmp_dir);
2430                 close_pipe(r->netns_storage_socket);
2431                 free(r);
2432         }
2433
2434         return NULL;
2435 }
2436
2437 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2438         assert(u);
2439         assert(f);
2440         assert(fds);
2441
2442         if (!rt)
2443                 return 0;
2444
2445         if (rt->tmp_dir)
2446                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2447
2448         if (rt->var_tmp_dir)
2449                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2450
2451         if (rt->netns_storage_socket[0] >= 0) {
2452                 int copy;
2453
2454                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2455                 if (copy < 0)
2456                         return copy;
2457
2458                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2459         }
2460
2461         if (rt->netns_storage_socket[1] >= 0) {
2462                 int copy;
2463
2464                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2465                 if (copy < 0)
2466                         return copy;
2467
2468                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2469         }
2470
2471         return 0;
2472 }
2473
2474 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2475         int r;
2476
2477         assert(rt);
2478         assert(key);
2479         assert(value);
2480
2481         if (streq(key, "tmp-dir")) {
2482                 char *copy;
2483
2484                 r = exec_runtime_allocate(rt);
2485                 if (r < 0)
2486                         return r;
2487
2488                 copy = strdup(value);
2489                 if (!copy)
2490                         return log_oom();
2491
2492                 free((*rt)->tmp_dir);
2493                 (*rt)->tmp_dir = copy;
2494
2495         } else if (streq(key, "var-tmp-dir")) {
2496                 char *copy;
2497
2498                 r = exec_runtime_allocate(rt);
2499                 if (r < 0)
2500                         return r;
2501
2502                 copy = strdup(value);
2503                 if (!copy)
2504                         return log_oom();
2505
2506                 free((*rt)->var_tmp_dir);
2507                 (*rt)->var_tmp_dir = copy;
2508
2509         } else if (streq(key, "netns-socket-0")) {
2510                 int fd;
2511
2512                 r = exec_runtime_allocate(rt);
2513                 if (r < 0)
2514                         return r;
2515
2516                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2517                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2518                 else {
2519                         if ((*rt)->netns_storage_socket[0] >= 0)
2520                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2521
2522                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2523                 }
2524         } else if (streq(key, "netns-socket-1")) {
2525                 int fd;
2526
2527                 r = exec_runtime_allocate(rt);
2528                 if (r < 0)
2529                         return r;
2530
2531                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2532                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2533                 else {
2534                         if ((*rt)->netns_storage_socket[1] >= 0)
2535                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2536
2537                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2538                 }
2539         } else
2540                 return 0;
2541
2542         return 1;
2543 }
2544
2545 static void *remove_tmpdir_thread(void *p) {
2546         _cleanup_free_ char *path = p;
2547
2548         rm_rf_dangerous(path, false, true, false);
2549         return NULL;
2550 }
2551
2552 void exec_runtime_destroy(ExecRuntime *rt) {
2553         if (!rt)
2554                 return;
2555
2556         /* If there are multiple users of this, let's leave the stuff around */
2557         if (rt->n_ref > 1)
2558                 return;
2559
2560         if (rt->tmp_dir) {
2561                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2562                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2563                 rt->tmp_dir = NULL;
2564         }
2565
2566         if (rt->var_tmp_dir) {
2567                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2568                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2569                 rt->var_tmp_dir = NULL;
2570         }
2571
2572         close_pipe(rt->netns_storage_socket);
2573 }
2574
2575 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2576         [EXEC_INPUT_NULL] = "null",
2577         [EXEC_INPUT_TTY] = "tty",
2578         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2579         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2580         [EXEC_INPUT_SOCKET] = "socket"
2581 };
2582
2583 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2584
2585 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2586         [EXEC_OUTPUT_INHERIT] = "inherit",
2587         [EXEC_OUTPUT_NULL] = "null",
2588         [EXEC_OUTPUT_TTY] = "tty",
2589         [EXEC_OUTPUT_SYSLOG] = "syslog",
2590         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2591         [EXEC_OUTPUT_KMSG] = "kmsg",
2592         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2593         [EXEC_OUTPUT_JOURNAL] = "journal",
2594         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2595         [EXEC_OUTPUT_SOCKET] = "socket"
2596 };
2597
2598 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);