chiark / gitweb /
nspawn,man: use a common vocabulary when referring to selinux security contexts
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #include "execute.h"
55 #include "strv.h"
56 #include "macro.h"
57 #include "capability.h"
58 #include "util.h"
59 #include "log.h"
60 #include "sd-messages.h"
61 #include "ioprio.h"
62 #include "securebits.h"
63 #include "namespace.h"
64 #include "tcpwrap.h"
65 #include "exit-status.h"
66 #include "missing.h"
67 #include "utmp-wtmp.h"
68 #include "def.h"
69 #include "path-util.h"
70 #include "syscall-list.h"
71 #include "env-util.h"
72 #include "fileio.h"
73 #include "unit.h"
74 #include "async.h"
75 #include "selinux-util.h"
76
77 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
78 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
79
80 /* This assumes there is a 'tty' group */
81 #define TTY_MODE 0620
82
83 #define SNDBUF_SIZE (8*1024*1024)
84
85 static int shift_fds(int fds[], unsigned n_fds) {
86         int start, restart_from;
87
88         if (n_fds <= 0)
89                 return 0;
90
91         /* Modifies the fds array! (sorts it) */
92
93         assert(fds);
94
95         start = 0;
96         for (;;) {
97                 int i;
98
99                 restart_from = -1;
100
101                 for (i = start; i < (int) n_fds; i++) {
102                         int nfd;
103
104                         /* Already at right index? */
105                         if (fds[i] == i+3)
106                                 continue;
107
108                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
109                                 return -errno;
110
111                         close_nointr_nofail(fds[i]);
112                         fds[i] = nfd;
113
114                         /* Hmm, the fd we wanted isn't free? Then
115                          * let's remember that and try again from here*/
116                         if (nfd != i+3 && restart_from < 0)
117                                 restart_from = i;
118                 }
119
120                 if (restart_from < 0)
121                         break;
122
123                 start = restart_from;
124         }
125
126         return 0;
127 }
128
129 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
130         unsigned i;
131         int r;
132
133         if (n_fds <= 0)
134                 return 0;
135
136         assert(fds);
137
138         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
139
140         for (i = 0; i < n_fds; i++) {
141
142                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
143                         return r;
144
145                 /* We unconditionally drop FD_CLOEXEC from the fds,
146                  * since after all we want to pass these fds to our
147                  * children */
148
149                 if ((r = fd_cloexec(fds[i], false)) < 0)
150                         return r;
151         }
152
153         return 0;
154 }
155
156 _pure_ static const char *tty_path(const ExecContext *context) {
157         assert(context);
158
159         if (context->tty_path)
160                 return context->tty_path;
161
162         return "/dev/console";
163 }
164
165 static void exec_context_tty_reset(const ExecContext *context) {
166         assert(context);
167
168         if (context->tty_vhangup)
169                 terminal_vhangup(tty_path(context));
170
171         if (context->tty_reset)
172                 reset_terminal(tty_path(context));
173
174         if (context->tty_vt_disallocate && context->tty_path)
175                 vt_disallocate(context->tty_path);
176 }
177
178 static bool is_terminal_output(ExecOutput o) {
179         return
180                 o == EXEC_OUTPUT_TTY ||
181                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
182                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
183                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
184 }
185
186 static int open_null_as(int flags, int nfd) {
187         int fd, r;
188
189         assert(nfd >= 0);
190
191         fd = open("/dev/null", flags|O_NOCTTY);
192         if (fd < 0)
193                 return -errno;
194
195         if (fd != nfd) {
196                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
197                 close_nointr_nofail(fd);
198         } else
199                 r = nfd;
200
201         return r;
202 }
203
204 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
205         int fd, r;
206         union sockaddr_union sa = {
207                 .un.sun_family = AF_UNIX,
208                 .un.sun_path = "/run/systemd/journal/stdout",
209         };
210
211         assert(context);
212         assert(output < _EXEC_OUTPUT_MAX);
213         assert(ident);
214         assert(nfd >= 0);
215
216         fd = socket(AF_UNIX, SOCK_STREAM, 0);
217         if (fd < 0)
218                 return -errno;
219
220         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
221         if (r < 0) {
222                 close_nointr_nofail(fd);
223                 return -errno;
224         }
225
226         if (shutdown(fd, SHUT_RD) < 0) {
227                 close_nointr_nofail(fd);
228                 return -errno;
229         }
230
231         fd_inc_sndbuf(fd, SNDBUF_SIZE);
232
233         dprintf(fd,
234                 "%s\n"
235                 "%s\n"
236                 "%i\n"
237                 "%i\n"
238                 "%i\n"
239                 "%i\n"
240                 "%i\n",
241                 context->syslog_identifier ? context->syslog_identifier : ident,
242                 unit_id,
243                 context->syslog_priority,
244                 !!context->syslog_level_prefix,
245                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
246                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
247                 is_terminal_output(output));
248
249         if (fd != nfd) {
250                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
251                 close_nointr_nofail(fd);
252         } else
253                 r = nfd;
254
255         return r;
256 }
257 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
258         int fd, r;
259
260         assert(path);
261         assert(nfd >= 0);
262
263         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
264                 return fd;
265
266         if (fd != nfd) {
267                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268                 close_nointr_nofail(fd);
269         } else
270                 r = nfd;
271
272         return r;
273 }
274
275 static bool is_terminal_input(ExecInput i) {
276         return
277                 i == EXEC_INPUT_TTY ||
278                 i == EXEC_INPUT_TTY_FORCE ||
279                 i == EXEC_INPUT_TTY_FAIL;
280 }
281
282 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
283
284         if (is_terminal_input(std_input) && !apply_tty_stdin)
285                 return EXEC_INPUT_NULL;
286
287         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
288                 return EXEC_INPUT_NULL;
289
290         return std_input;
291 }
292
293 static int fixup_output(ExecOutput std_output, int socket_fd) {
294
295         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
296                 return EXEC_OUTPUT_INHERIT;
297
298         return std_output;
299 }
300
301 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
302         ExecInput i;
303
304         assert(context);
305
306         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
307
308         switch (i) {
309
310         case EXEC_INPUT_NULL:
311                 return open_null_as(O_RDONLY, STDIN_FILENO);
312
313         case EXEC_INPUT_TTY:
314         case EXEC_INPUT_TTY_FORCE:
315         case EXEC_INPUT_TTY_FAIL: {
316                 int fd, r;
317
318                 fd = acquire_terminal(tty_path(context),
319                                       i == EXEC_INPUT_TTY_FAIL,
320                                       i == EXEC_INPUT_TTY_FORCE,
321                                       false,
322                                       (usec_t) -1);
323                 if (fd < 0)
324                         return fd;
325
326                 if (fd != STDIN_FILENO) {
327                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
328                         close_nointr_nofail(fd);
329                 } else
330                         r = STDIN_FILENO;
331
332                 return r;
333         }
334
335         case EXEC_INPUT_SOCKET:
336                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
337
338         default:
339                 assert_not_reached("Unknown input type");
340         }
341 }
342
343 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
344         ExecOutput o;
345         ExecInput i;
346         int r;
347
348         assert(context);
349         assert(ident);
350
351         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
352         o = fixup_output(context->std_output, socket_fd);
353
354         if (fileno == STDERR_FILENO) {
355                 ExecOutput e;
356                 e = fixup_output(context->std_error, socket_fd);
357
358                 /* This expects the input and output are already set up */
359
360                 /* Don't change the stderr file descriptor if we inherit all
361                  * the way and are not on a tty */
362                 if (e == EXEC_OUTPUT_INHERIT &&
363                     o == EXEC_OUTPUT_INHERIT &&
364                     i == EXEC_INPUT_NULL &&
365                     !is_terminal_input(context->std_input) &&
366                     getppid () != 1)
367                         return fileno;
368
369                 /* Duplicate from stdout if possible */
370                 if (e == o || e == EXEC_OUTPUT_INHERIT)
371                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
372
373                 o = e;
374
375         } else if (o == EXEC_OUTPUT_INHERIT) {
376                 /* If input got downgraded, inherit the original value */
377                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
378                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
379
380                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
381                 if (i != EXEC_INPUT_NULL)
382                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
383
384                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
385                 if (getppid() != 1)
386                         return fileno;
387
388                 /* We need to open /dev/null here anew, to get the right access mode. */
389                 return open_null_as(O_WRONLY, fileno);
390         }
391
392         switch (o) {
393
394         case EXEC_OUTPUT_NULL:
395                 return open_null_as(O_WRONLY, fileno);
396
397         case EXEC_OUTPUT_TTY:
398                 if (is_terminal_input(i))
399                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
400
401                 /* We don't reset the terminal if this is just about output */
402                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
403
404         case EXEC_OUTPUT_SYSLOG:
405         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
406         case EXEC_OUTPUT_KMSG:
407         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
408         case EXEC_OUTPUT_JOURNAL:
409         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
410                 r = connect_logger_as(context, o, ident, unit_id, fileno);
411                 if (r < 0) {
412                         log_struct_unit(LOG_CRIT, unit_id,
413                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
414                                 fileno == STDOUT_FILENO ? "out" : "err",
415                                 unit_id, strerror(-r),
416                                 "ERRNO=%d", -r,
417                                 NULL);
418                         r = open_null_as(O_WRONLY, fileno);
419                 }
420                 return r;
421
422         case EXEC_OUTPUT_SOCKET:
423                 assert(socket_fd >= 0);
424                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
425
426         default:
427                 assert_not_reached("Unknown error type");
428         }
429 }
430
431 static int chown_terminal(int fd, uid_t uid) {
432         struct stat st;
433
434         assert(fd >= 0);
435
436         /* This might fail. What matters are the results. */
437         (void) fchown(fd, uid, -1);
438         (void) fchmod(fd, TTY_MODE);
439
440         if (fstat(fd, &st) < 0)
441                 return -errno;
442
443         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
444                 return -EPERM;
445
446         return 0;
447 }
448
449 static int setup_confirm_stdio(int *_saved_stdin,
450                                int *_saved_stdout) {
451         int fd = -1, saved_stdin, saved_stdout = -1, r;
452
453         assert(_saved_stdin);
454         assert(_saved_stdout);
455
456         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
457         if (saved_stdin < 0)
458                 return -errno;
459
460         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
461         if (saved_stdout < 0) {
462                 r = errno;
463                 goto fail;
464         }
465
466         fd = acquire_terminal(
467                         "/dev/console",
468                         false,
469                         false,
470                         false,
471                         DEFAULT_CONFIRM_USEC);
472         if (fd < 0) {
473                 r = fd;
474                 goto fail;
475         }
476
477         r = chown_terminal(fd, getuid());
478         if (r < 0)
479                 goto fail;
480
481         if (dup2(fd, STDIN_FILENO) < 0) {
482                 r = -errno;
483                 goto fail;
484         }
485
486         if (dup2(fd, STDOUT_FILENO) < 0) {
487                 r = -errno;
488                 goto fail;
489         }
490
491         if (fd >= 2)
492                 close_nointr_nofail(fd);
493
494         *_saved_stdin = saved_stdin;
495         *_saved_stdout = saved_stdout;
496
497         return 0;
498
499 fail:
500         if (saved_stdout >= 0)
501                 close_nointr_nofail(saved_stdout);
502
503         if (saved_stdin >= 0)
504                 close_nointr_nofail(saved_stdin);
505
506         if (fd >= 0)
507                 close_nointr_nofail(fd);
508
509         return r;
510 }
511
512 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
513         int fd;
514         va_list ap;
515
516         assert(format);
517
518         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
519         if (fd < 0)
520                 return fd;
521
522         va_start(ap, format);
523         vdprintf(fd, format, ap);
524         va_end(ap);
525
526         close_nointr_nofail(fd);
527
528         return 0;
529 }
530
531 static int restore_confirm_stdio(int *saved_stdin,
532                                  int *saved_stdout) {
533
534         int r = 0;
535
536         assert(saved_stdin);
537         assert(saved_stdout);
538
539         release_terminal();
540
541         if (*saved_stdin >= 0)
542                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
543                         r = -errno;
544
545         if (*saved_stdout >= 0)
546                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
547                         r = -errno;
548
549         if (*saved_stdin >= 0)
550                 close_nointr_nofail(*saved_stdin);
551
552         if (*saved_stdout >= 0)
553                 close_nointr_nofail(*saved_stdout);
554
555         return r;
556 }
557
558 static int ask_for_confirmation(char *response, char **argv) {
559         int saved_stdout = -1, saved_stdin = -1, r;
560         char *line;
561
562         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
563         if (r < 0)
564                 return r;
565
566         line = exec_command_line(argv);
567         if (!line)
568                 return -ENOMEM;
569
570         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
571         free(line);
572
573         restore_confirm_stdio(&saved_stdin, &saved_stdout);
574
575         return r;
576 }
577
578 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
579         bool keep_groups = false;
580         int r;
581
582         assert(context);
583
584         /* Lookup and set GID and supplementary group list. Here too
585          * we avoid NSS lookups for gid=0. */
586
587         if (context->group || username) {
588
589                 if (context->group) {
590                         const char *g = context->group;
591
592                         if ((r = get_group_creds(&g, &gid)) < 0)
593                                 return r;
594                 }
595
596                 /* First step, initialize groups from /etc/groups */
597                 if (username && gid != 0) {
598                         if (initgroups(username, gid) < 0)
599                                 return -errno;
600
601                         keep_groups = true;
602                 }
603
604                 /* Second step, set our gids */
605                 if (setresgid(gid, gid, gid) < 0)
606                         return -errno;
607         }
608
609         if (context->supplementary_groups) {
610                 int ngroups_max, k;
611                 gid_t *gids;
612                 char **i;
613
614                 /* Final step, initialize any manually set supplementary groups */
615                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
616
617                 if (!(gids = new(gid_t, ngroups_max)))
618                         return -ENOMEM;
619
620                 if (keep_groups) {
621                         if ((k = getgroups(ngroups_max, gids)) < 0) {
622                                 free(gids);
623                                 return -errno;
624                         }
625                 } else
626                         k = 0;
627
628                 STRV_FOREACH(i, context->supplementary_groups) {
629                         const char *g;
630
631                         if (k >= ngroups_max) {
632                                 free(gids);
633                                 return -E2BIG;
634                         }
635
636                         g = *i;
637                         r = get_group_creds(&g, gids+k);
638                         if (r < 0) {
639                                 free(gids);
640                                 return r;
641                         }
642
643                         k++;
644                 }
645
646                 if (setgroups(k, gids) < 0) {
647                         free(gids);
648                         return -errno;
649                 }
650
651                 free(gids);
652         }
653
654         return 0;
655 }
656
657 static int enforce_user(const ExecContext *context, uid_t uid) {
658         assert(context);
659
660         /* Sets (but doesn't lookup) the uid and make sure we keep the
661          * capabilities while doing so. */
662
663         if (context->capabilities) {
664                 _cleanup_cap_free_ cap_t d = NULL;
665                 static const cap_value_t bits[] = {
666                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
667                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
668                 };
669
670                 /* First step: If we need to keep capabilities but
671                  * drop privileges we need to make sure we keep our
672                  * caps, while we drop privileges. */
673                 if (uid != 0) {
674                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
675
676                         if (prctl(PR_GET_SECUREBITS) != sb)
677                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
678                                         return -errno;
679                 }
680
681                 /* Second step: set the capabilities. This will reduce
682                  * the capabilities to the minimum we need. */
683
684                 d = cap_dup(context->capabilities);
685                 if (!d)
686                         return -errno;
687
688                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
689                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
690                         return -errno;
691
692                 if (cap_set_proc(d) < 0)
693                         return -errno;
694         }
695
696         /* Third step: actually set the uids */
697         if (setresuid(uid, uid, uid) < 0)
698                 return -errno;
699
700         /* At this point we should have all necessary capabilities but
701            are otherwise a normal user. However, the caps might got
702            corrupted due to the setresuid() so we need clean them up
703            later. This is done outside of this call. */
704
705         return 0;
706 }
707
708 #ifdef HAVE_PAM
709
710 static int null_conv(
711                 int num_msg,
712                 const struct pam_message **msg,
713                 struct pam_response **resp,
714                 void *appdata_ptr) {
715
716         /* We don't support conversations */
717
718         return PAM_CONV_ERR;
719 }
720
721 static int setup_pam(
722                 const char *name,
723                 const char *user,
724                 uid_t uid,
725                 const char *tty,
726                 char ***pam_env,
727                 int fds[], unsigned n_fds) {
728
729         static const struct pam_conv conv = {
730                 .conv = null_conv,
731                 .appdata_ptr = NULL
732         };
733
734         pam_handle_t *handle = NULL;
735         sigset_t ss, old_ss;
736         int pam_code = PAM_SUCCESS;
737         int err;
738         char **e = NULL;
739         bool close_session = false;
740         pid_t pam_pid = 0, parent_pid;
741         int flags = 0;
742
743         assert(name);
744         assert(user);
745         assert(pam_env);
746
747         /* We set up PAM in the parent process, then fork. The child
748          * will then stay around until killed via PR_GET_PDEATHSIG or
749          * systemd via the cgroup logic. It will then remove the PAM
750          * session again. The parent process will exec() the actual
751          * daemon. We do things this way to ensure that the main PID
752          * of the daemon is the one we initially fork()ed. */
753
754         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
755                 flags |= PAM_SILENT;
756
757         pam_code = pam_start(name, user, &conv, &handle);
758         if (pam_code != PAM_SUCCESS) {
759                 handle = NULL;
760                 goto fail;
761         }
762
763         if (tty) {
764                 pam_code = pam_set_item(handle, PAM_TTY, tty);
765                 if (pam_code != PAM_SUCCESS)
766                         goto fail;
767         }
768
769         pam_code = pam_acct_mgmt(handle, flags);
770         if (pam_code != PAM_SUCCESS)
771                 goto fail;
772
773         pam_code = pam_open_session(handle, flags);
774         if (pam_code != PAM_SUCCESS)
775                 goto fail;
776
777         close_session = true;
778
779         e = pam_getenvlist(handle);
780         if (!e) {
781                 pam_code = PAM_BUF_ERR;
782                 goto fail;
783         }
784
785         /* Block SIGTERM, so that we know that it won't get lost in
786          * the child */
787         if (sigemptyset(&ss) < 0 ||
788             sigaddset(&ss, SIGTERM) < 0 ||
789             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
790                 goto fail;
791
792         parent_pid = getpid();
793
794         pam_pid = fork();
795         if (pam_pid < 0)
796                 goto fail;
797
798         if (pam_pid == 0) {
799                 int sig;
800                 int r = EXIT_PAM;
801
802                 /* The child's job is to reset the PAM session on
803                  * termination */
804
805                 /* This string must fit in 10 chars (i.e. the length
806                  * of "/sbin/init"), to look pretty in /bin/ps */
807                 rename_process("(sd-pam)");
808
809                 /* Make sure we don't keep open the passed fds in this
810                 child. We assume that otherwise only those fds are
811                 open here that have been opened by PAM. */
812                 close_many(fds, n_fds);
813
814                 /* Drop privileges - we don't need any to pam_close_session
815                  * and this will make PR_SET_PDEATHSIG work in most cases.
816                  * If this fails, ignore the error - but expect sd-pam threads
817                  * to fail to exit normally */
818                 if (setresuid(uid, uid, uid) < 0)
819                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
820
821                 /* Wait until our parent died. This will only work if
822                  * the above setresuid() succeeds, otherwise the kernel
823                  * will not allow unprivileged parents kill their privileged
824                  * children this way. We rely on the control groups kill logic
825                  * to do the rest for us. */
826                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
827                         goto child_finish;
828
829                 /* Check if our parent process might already have
830                  * died? */
831                 if (getppid() == parent_pid) {
832                         for (;;) {
833                                 if (sigwait(&ss, &sig) < 0) {
834                                         if (errno == EINTR)
835                                                 continue;
836
837                                         goto child_finish;
838                                 }
839
840                                 assert(sig == SIGTERM);
841                                 break;
842                         }
843                 }
844
845                 /* If our parent died we'll end the session */
846                 if (getppid() != parent_pid) {
847                         pam_code = pam_close_session(handle, flags);
848                         if (pam_code != PAM_SUCCESS)
849                                 goto child_finish;
850                 }
851
852                 r = 0;
853
854         child_finish:
855                 pam_end(handle, pam_code | flags);
856                 _exit(r);
857         }
858
859         /* If the child was forked off successfully it will do all the
860          * cleanups, so forget about the handle here. */
861         handle = NULL;
862
863         /* Unblock SIGTERM again in the parent */
864         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
865                 goto fail;
866
867         /* We close the log explicitly here, since the PAM modules
868          * might have opened it, but we don't want this fd around. */
869         closelog();
870
871         *pam_env = e;
872         e = NULL;
873
874         return 0;
875
876 fail:
877         if (pam_code != PAM_SUCCESS) {
878                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
879                 err = -EPERM;  /* PAM errors do not map to errno */
880         } else {
881                 log_error("PAM failed: %m");
882                 err = -errno;
883         }
884
885         if (handle) {
886                 if (close_session)
887                         pam_code = pam_close_session(handle, flags);
888
889                 pam_end(handle, pam_code | flags);
890         }
891
892         strv_free(e);
893
894         closelog();
895
896         if (pam_pid > 1) {
897                 kill(pam_pid, SIGTERM);
898                 kill(pam_pid, SIGCONT);
899         }
900
901         return err;
902 }
903 #endif
904
905 static void rename_process_from_path(const char *path) {
906         char process_name[11];
907         const char *p;
908         size_t l;
909
910         /* This resulting string must fit in 10 chars (i.e. the length
911          * of "/sbin/init") to look pretty in /bin/ps */
912
913         p = basename(path);
914         if (isempty(p)) {
915                 rename_process("(...)");
916                 return;
917         }
918
919         l = strlen(p);
920         if (l > 8) {
921                 /* The end of the process name is usually more
922                  * interesting, since the first bit might just be
923                  * "systemd-" */
924                 p = p + l - 8;
925                 l = 8;
926         }
927
928         process_name[0] = '(';
929         memcpy(process_name+1, p, l);
930         process_name[1+l] = ')';
931         process_name[1+l+1] = 0;
932
933         rename_process(process_name);
934 }
935
936 static int apply_seccomp(uint32_t *syscall_filter) {
937         static const struct sock_filter header[] = {
938                 VALIDATE_ARCHITECTURE,
939                 EXAMINE_SYSCALL
940         };
941         static const struct sock_filter footer[] = {
942                 _KILL_PROCESS
943         };
944
945         int i;
946         unsigned n;
947         struct sock_filter *f;
948         struct sock_fprog prog = {};
949
950         assert(syscall_filter);
951
952         /* First: count the syscalls to check for */
953         for (i = 0, n = 0; i < syscall_max(); i++)
954                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
955                         n++;
956
957         /* Second: build the filter program from a header the syscall
958          * matches and the footer */
959         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
960         memcpy(f, header, sizeof(header));
961
962         for (i = 0, n = 0; i < syscall_max(); i++)
963                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
964                         struct sock_filter item[] = {
965                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
966                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
967                         };
968
969                         assert_cc(ELEMENTSOF(item) == 2);
970
971                         f[ELEMENTSOF(header) + 2*n]  = item[0];
972                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
973
974                         n++;
975                 }
976
977         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
978
979         /* Third: install the filter */
980         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
981         prog.filter = f;
982         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
983                 return -errno;
984
985         return 0;
986 }
987
988 static void do_idle_pipe_dance(int idle_pipe[4]) {
989         assert(idle_pipe);
990
991         if (idle_pipe[1] >= 0)
992                 close_nointr_nofail(idle_pipe[1]);
993         if (idle_pipe[2] >= 0)
994                 close_nointr_nofail(idle_pipe[2]);
995
996         if (idle_pipe[0] >= 0) {
997                 int r;
998
999                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1000
1001                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1002                         /* Signal systemd that we are bored and want to continue. */
1003                         write(idle_pipe[3], "x", 1);
1004
1005                         /* Wait for systemd to react to the signal above. */
1006                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1007                 }
1008
1009                 close_nointr_nofail(idle_pipe[0]);
1010
1011         }
1012
1013         if (idle_pipe[3] >= 0)
1014                 close_nointr_nofail(idle_pipe[3]);
1015 }
1016
1017 static int build_environment(
1018                 ExecContext *c,
1019                 unsigned n_fds,
1020                 usec_t watchdog_usec,
1021                 const char *home,
1022                 const char *username,
1023                 const char *shell,
1024                 char ***ret) {
1025
1026         _cleanup_strv_free_ char **our_env = NULL;
1027         unsigned n_env = 0;
1028         char *x;
1029
1030         assert(c);
1031         assert(ret);
1032
1033         our_env = new0(char*, 10);
1034         if (!our_env)
1035                 return -ENOMEM;
1036
1037         if (n_fds > 0) {
1038                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1039                         return -ENOMEM;
1040                 our_env[n_env++] = x;
1041
1042                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1043                         return -ENOMEM;
1044                 our_env[n_env++] = x;
1045         }
1046
1047         if (watchdog_usec > 0) {
1048                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1049                         return -ENOMEM;
1050                 our_env[n_env++] = x;
1051
1052                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1053                         return -ENOMEM;
1054                 our_env[n_env++] = x;
1055         }
1056
1057         if (home) {
1058                 x = strappend("HOME=", home);
1059                 if (!x)
1060                         return -ENOMEM;
1061                 our_env[n_env++] = x;
1062         }
1063
1064         if (username) {
1065                 x = strappend("LOGNAME=", username);
1066                 if (!x)
1067                         return -ENOMEM;
1068                 our_env[n_env++] = x;
1069
1070                 x = strappend("USER=", username);
1071                 if (!x)
1072                         return -ENOMEM;
1073                 our_env[n_env++] = x;
1074         }
1075
1076         if (shell) {
1077                 x = strappend("SHELL=", shell);
1078                 if (!x)
1079                         return -ENOMEM;
1080                 our_env[n_env++] = x;
1081         }
1082
1083         if (is_terminal_input(c->std_input) ||
1084             c->std_output == EXEC_OUTPUT_TTY ||
1085             c->std_error == EXEC_OUTPUT_TTY ||
1086             c->tty_path) {
1087
1088                 x = strdup(default_term_for_tty(tty_path(c)));
1089                 if (!x)
1090                         return -ENOMEM;
1091                 our_env[n_env++] = x;
1092         }
1093
1094         our_env[n_env++] = NULL;
1095         assert(n_env <= 10);
1096
1097         *ret = our_env;
1098         our_env = NULL;
1099
1100         return 0;
1101 }
1102
1103 int exec_spawn(ExecCommand *command,
1104                char **argv,
1105                ExecContext *context,
1106                int fds[], unsigned n_fds,
1107                char **environment,
1108                bool apply_permissions,
1109                bool apply_chroot,
1110                bool apply_tty_stdin,
1111                bool confirm_spawn,
1112                CGroupControllerMask cgroup_supported,
1113                const char *cgroup_path,
1114                const char *unit_id,
1115                usec_t watchdog_usec,
1116                int idle_pipe[4],
1117                ExecRuntime *runtime,
1118                pid_t *ret) {
1119
1120         _cleanup_strv_free_ char **files_env = NULL;
1121         int socket_fd;
1122         char *line;
1123         pid_t pid;
1124         int r;
1125
1126         assert(command);
1127         assert(context);
1128         assert(ret);
1129         assert(fds || n_fds <= 0);
1130
1131         if (context->std_input == EXEC_INPUT_SOCKET ||
1132             context->std_output == EXEC_OUTPUT_SOCKET ||
1133             context->std_error == EXEC_OUTPUT_SOCKET) {
1134
1135                 if (n_fds != 1)
1136                         return -EINVAL;
1137
1138                 socket_fd = fds[0];
1139
1140                 fds = NULL;
1141                 n_fds = 0;
1142         } else
1143                 socket_fd = -1;
1144
1145         r = exec_context_load_environment(context, &files_env);
1146         if (r < 0) {
1147                 log_struct_unit(LOG_ERR,
1148                            unit_id,
1149                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1150                            "ERRNO=%d", -r,
1151                            NULL);
1152                 return r;
1153         }
1154
1155         if (!argv)
1156                 argv = command->argv;
1157
1158         line = exec_command_line(argv);
1159         if (!line)
1160                 return log_oom();
1161
1162         log_struct_unit(LOG_DEBUG,
1163                         unit_id,
1164                         "EXECUTABLE=%s", command->path,
1165                         "MESSAGE=About to execute: %s", line,
1166                         NULL);
1167         free(line);
1168
1169         pid = fork();
1170         if (pid < 0)
1171                 return -errno;
1172
1173         if (pid == 0) {
1174                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1175                 const char *username = NULL, *home = NULL, *shell = NULL;
1176                 unsigned n_dont_close = 0;
1177                 int dont_close[n_fds + 3];
1178                 uid_t uid = (uid_t) -1;
1179                 gid_t gid = (gid_t) -1;
1180                 sigset_t ss;
1181                 int i, err;
1182
1183                 /* child */
1184
1185                 rename_process_from_path(command->path);
1186
1187                 /* We reset exactly these signals, since they are the
1188                  * only ones we set to SIG_IGN in the main daemon. All
1189                  * others we leave untouched because we set them to
1190                  * SIG_DFL or a valid handler initially, both of which
1191                  * will be demoted to SIG_DFL. */
1192                 default_signals(SIGNALS_CRASH_HANDLER,
1193                                 SIGNALS_IGNORE, -1);
1194
1195                 if (context->ignore_sigpipe)
1196                         ignore_signals(SIGPIPE, -1);
1197
1198                 assert_se(sigemptyset(&ss) == 0);
1199                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1200                         err = -errno;
1201                         r = EXIT_SIGNAL_MASK;
1202                         goto fail_child;
1203                 }
1204
1205                 if (idle_pipe)
1206                         do_idle_pipe_dance(idle_pipe);
1207
1208                 /* Close sockets very early to make sure we don't
1209                  * block init reexecution because it cannot bind its
1210                  * sockets */
1211                 log_forget_fds();
1212
1213                 if (socket_fd >= 0)
1214                         dont_close[n_dont_close++] = socket_fd;
1215                 if (n_fds > 0) {
1216                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1217                         n_dont_close += n_fds;
1218                 }
1219                 if (runtime) {
1220                         if (runtime->netns_storage_socket[0] >= 0)
1221                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1222                         if (runtime->netns_storage_socket[1] >= 0)
1223                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1224                 }
1225
1226                 err = close_all_fds(dont_close, n_dont_close);
1227                 if (err < 0) {
1228                         r = EXIT_FDS;
1229                         goto fail_child;
1230                 }
1231
1232                 if (!context->same_pgrp)
1233                         if (setsid() < 0) {
1234                                 err = -errno;
1235                                 r = EXIT_SETSID;
1236                                 goto fail_child;
1237                         }
1238
1239                 if (context->tcpwrap_name) {
1240                         if (socket_fd >= 0)
1241                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1242                                         err = -EACCES;
1243                                         r = EXIT_TCPWRAP;
1244                                         goto fail_child;
1245                                 }
1246
1247                         for (i = 0; i < (int) n_fds; i++) {
1248                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1249                                         err = -EACCES;
1250                                         r = EXIT_TCPWRAP;
1251                                         goto fail_child;
1252                                 }
1253                         }
1254                 }
1255
1256                 exec_context_tty_reset(context);
1257
1258                 if (confirm_spawn) {
1259                         char response;
1260
1261                         err = ask_for_confirmation(&response, argv);
1262                         if (err == -ETIMEDOUT)
1263                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1264                         else if (err < 0)
1265                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1266                         else if (response == 's') {
1267                                 write_confirm_message("Skipping execution.\n");
1268                                 err = -ECANCELED;
1269                                 r = EXIT_CONFIRM;
1270                                 goto fail_child;
1271                         } else if (response == 'n') {
1272                                 write_confirm_message("Failing execution.\n");
1273                                 err = r = 0;
1274                                 goto fail_child;
1275                         }
1276                 }
1277
1278                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1279                  * must sure to drop O_NONBLOCK */
1280                 if (socket_fd >= 0)
1281                         fd_nonblock(socket_fd, false);
1282
1283                 err = setup_input(context, socket_fd, apply_tty_stdin);
1284                 if (err < 0) {
1285                         r = EXIT_STDIN;
1286                         goto fail_child;
1287                 }
1288
1289                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1290                 if (err < 0) {
1291                         r = EXIT_STDOUT;
1292                         goto fail_child;
1293                 }
1294
1295                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1296                 if (err < 0) {
1297                         r = EXIT_STDERR;
1298                         goto fail_child;
1299                 }
1300
1301                 if (cgroup_path) {
1302                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1303                         if (err < 0) {
1304                                 r = EXIT_CGROUP;
1305                                 goto fail_child;
1306                         }
1307                 }
1308
1309                 if (context->oom_score_adjust_set) {
1310                         char t[16];
1311
1312                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1313                         char_array_0(t);
1314
1315                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1316                                 err = -errno;
1317                                 r = EXIT_OOM_ADJUST;
1318                                 goto fail_child;
1319                         }
1320                 }
1321
1322                 if (context->nice_set)
1323                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1324                                 err = -errno;
1325                                 r = EXIT_NICE;
1326                                 goto fail_child;
1327                         }
1328
1329                 if (context->cpu_sched_set) {
1330                         struct sched_param param = {
1331                                 .sched_priority = context->cpu_sched_priority,
1332                         };
1333
1334                         r = sched_setscheduler(0,
1335                                                context->cpu_sched_policy |
1336                                                (context->cpu_sched_reset_on_fork ?
1337                                                 SCHED_RESET_ON_FORK : 0),
1338                                                &param);
1339                         if (r < 0) {
1340                                 err = -errno;
1341                                 r = EXIT_SETSCHEDULER;
1342                                 goto fail_child;
1343                         }
1344                 }
1345
1346                 if (context->cpuset)
1347                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1348                                 err = -errno;
1349                                 r = EXIT_CPUAFFINITY;
1350                                 goto fail_child;
1351                         }
1352
1353                 if (context->ioprio_set)
1354                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1355                                 err = -errno;
1356                                 r = EXIT_IOPRIO;
1357                                 goto fail_child;
1358                         }
1359
1360                 if (context->timer_slack_nsec != (nsec_t) -1)
1361                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1362                                 err = -errno;
1363                                 r = EXIT_TIMERSLACK;
1364                                 goto fail_child;
1365                         }
1366
1367                 if (context->utmp_id)
1368                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1369
1370                 if (context->user) {
1371                         username = context->user;
1372                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1373                         if (err < 0) {
1374                                 r = EXIT_USER;
1375                                 goto fail_child;
1376                         }
1377
1378                         if (is_terminal_input(context->std_input)) {
1379                                 err = chown_terminal(STDIN_FILENO, uid);
1380                                 if (err < 0) {
1381                                         r = EXIT_STDIN;
1382                                         goto fail_child;
1383                                 }
1384                         }
1385                 }
1386
1387 #ifdef HAVE_PAM
1388                 if (cgroup_path && context->user && context->pam_name) {
1389                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1390                         if (err < 0) {
1391                                 r = EXIT_CGROUP;
1392                                 goto fail_child;
1393                         }
1394
1395
1396                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1397                         if (err < 0) {
1398                                 r = EXIT_CGROUP;
1399                                 goto fail_child;
1400                         }
1401                 }
1402 #endif
1403
1404                 if (apply_permissions) {
1405                         err = enforce_groups(context, username, gid);
1406                         if (err < 0) {
1407                                 r = EXIT_GROUP;
1408                                 goto fail_child;
1409                         }
1410                 }
1411
1412                 umask(context->umask);
1413
1414 #ifdef HAVE_PAM
1415                 if (apply_permissions && context->pam_name && username) {
1416                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1417                         if (err < 0) {
1418                                 r = EXIT_PAM;
1419                                 goto fail_child;
1420                         }
1421                 }
1422 #endif
1423                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1424                         err = setup_netns(runtime->netns_storage_socket);
1425                         if (err < 0) {
1426                                 r = EXIT_NETWORK;
1427                                 goto fail_child;
1428                         }
1429                 }
1430
1431                 if (!strv_isempty(context->read_write_dirs) ||
1432                     !strv_isempty(context->read_only_dirs) ||
1433                     !strv_isempty(context->inaccessible_dirs) ||
1434                     context->mount_flags != 0 ||
1435                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1436                     context->private_devices) {
1437
1438                         char *tmp = NULL, *var = NULL;
1439
1440                         /* The runtime struct only contains the parent
1441                          * of the private /tmp, which is
1442                          * non-accessible to world users. Inside of it
1443                          * there's a /tmp that is sticky, and that's
1444                          * the one we want to use here. */
1445
1446                         if (context->private_tmp && runtime) {
1447                                 if (runtime->tmp_dir)
1448                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1449                                 if (runtime->var_tmp_dir)
1450                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1451                         }
1452
1453                         err = setup_namespace(
1454                                         context->read_write_dirs,
1455                                         context->read_only_dirs,
1456                                         context->inaccessible_dirs,
1457                                         tmp,
1458                                         var,
1459                                         context->private_devices,
1460                                         context->mount_flags);
1461
1462                         if (err < 0) {
1463                                 r = EXIT_NAMESPACE;
1464                                 goto fail_child;
1465                         }
1466                 }
1467
1468                 if (apply_chroot) {
1469                         if (context->root_directory)
1470                                 if (chroot(context->root_directory) < 0) {
1471                                         err = -errno;
1472                                         r = EXIT_CHROOT;
1473                                         goto fail_child;
1474                                 }
1475
1476                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1477                                 err = -errno;
1478                                 r = EXIT_CHDIR;
1479                                 goto fail_child;
1480                         }
1481                 } else {
1482                         _cleanup_free_ char *d = NULL;
1483
1484                         if (asprintf(&d, "%s/%s",
1485                                      context->root_directory ? context->root_directory : "",
1486                                      context->working_directory ? context->working_directory : "") < 0) {
1487                                 err = -ENOMEM;
1488                                 r = EXIT_MEMORY;
1489                                 goto fail_child;
1490                         }
1491
1492                         if (chdir(d) < 0) {
1493                                 err = -errno;
1494                                 r = EXIT_CHDIR;
1495                                 goto fail_child;
1496                         }
1497                 }
1498
1499                 /* We repeat the fd closing here, to make sure that
1500                  * nothing is leaked from the PAM modules */
1501                 err = close_all_fds(fds, n_fds);
1502                 if (err >= 0)
1503                         err = shift_fds(fds, n_fds);
1504                 if (err >= 0)
1505                         err = flags_fds(fds, n_fds, context->non_blocking);
1506                 if (err < 0) {
1507                         r = EXIT_FDS;
1508                         goto fail_child;
1509                 }
1510
1511                 if (apply_permissions) {
1512
1513                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1514                                 if (!context->rlimit[i])
1515                                         continue;
1516
1517                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1518                                         err = -errno;
1519                                         r = EXIT_LIMITS;
1520                                         goto fail_child;
1521                                 }
1522                         }
1523
1524                         if (context->capability_bounding_set_drop) {
1525                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1526                                 if (err < 0) {
1527                                         r = EXIT_CAPABILITIES;
1528                                         goto fail_child;
1529                                 }
1530                         }
1531
1532                         if (context->user) {
1533                                 err = enforce_user(context, uid);
1534                                 if (err < 0) {
1535                                         r = EXIT_USER;
1536                                         goto fail_child;
1537                                 }
1538                         }
1539
1540                         /* PR_GET_SECUREBITS is not privileged, while
1541                          * PR_SET_SECUREBITS is. So to suppress
1542                          * potential EPERMs we'll try not to call
1543                          * PR_SET_SECUREBITS unless necessary. */
1544                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1545                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1546                                         err = -errno;
1547                                         r = EXIT_SECUREBITS;
1548                                         goto fail_child;
1549                                 }
1550
1551                         if (context->capabilities)
1552                                 if (cap_set_proc(context->capabilities) < 0) {
1553                                         err = -errno;
1554                                         r = EXIT_CAPABILITIES;
1555                                         goto fail_child;
1556                                 }
1557
1558                         if (context->no_new_privileges)
1559                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1560                                         err = -errno;
1561                                         r = EXIT_NO_NEW_PRIVILEGES;
1562                                         goto fail_child;
1563                                 }
1564
1565                         if (context->syscall_filter) {
1566                                 err = apply_seccomp(context->syscall_filter);
1567                                 if (err < 0) {
1568                                         r = EXIT_SECCOMP;
1569                                         goto fail_child;
1570                                 }
1571                         }
1572 #ifdef HAVE_SELINUX
1573                         if (context->selinux_context && use_selinux()) {
1574                                 bool ignore;
1575                                 char* c;
1576
1577                                 c = context->selinux_context;
1578                                 if (c[0] == '-') {
1579                                         c++;
1580                                         ignore = true;
1581                                 } else
1582                                         ignore = false;
1583
1584                                 err = setexeccon(c);
1585                                 if (err < 0 && !ignore) {
1586                                         r = EXIT_SELINUX_CONTEXT;
1587                                         goto fail_child;
1588                                 }
1589                         }
1590 #endif
1591                 }
1592
1593                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1594                 if (r < 0) {
1595                         r = EXIT_MEMORY;
1596                         goto fail_child;
1597                 }
1598
1599                 final_env = strv_env_merge(5,
1600                                            environment,
1601                                            our_env,
1602                                            context->environment,
1603                                            files_env,
1604                                            pam_env,
1605                                            NULL);
1606                 if (!final_env) {
1607                         err = -ENOMEM;
1608                         r = EXIT_MEMORY;
1609                         goto fail_child;
1610                 }
1611
1612                 final_argv = replace_env_argv(argv, final_env);
1613                 if (!final_argv) {
1614                         err = -ENOMEM;
1615                         r = EXIT_MEMORY;
1616                         goto fail_child;
1617                 }
1618
1619                 final_env = strv_env_clean(final_env);
1620
1621                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1622                         line = exec_command_line(final_argv);
1623                         if (line) {
1624                                 log_open();
1625                                 log_struct_unit(LOG_DEBUG,
1626                                                 unit_id,
1627                                                 "EXECUTABLE=%s", command->path,
1628                                                 "MESSAGE=Executing: %s", line,
1629                                                 NULL);
1630                                 log_close();
1631                                 free(line);
1632                                 line = NULL;
1633                         }
1634                 }
1635                 execve(command->path, final_argv, final_env);
1636                 err = -errno;
1637                 r = EXIT_EXEC;
1638
1639         fail_child:
1640                 if (r != 0) {
1641                         log_open();
1642                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1643                                    "EXECUTABLE=%s", command->path,
1644                                    "MESSAGE=Failed at step %s spawning %s: %s",
1645                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1646                                           command->path, strerror(-err),
1647                                    "ERRNO=%d", -err,
1648                                    NULL);
1649                         log_close();
1650                 }
1651
1652                 _exit(r);
1653         }
1654
1655         log_struct_unit(LOG_DEBUG,
1656                         unit_id,
1657                         "MESSAGE=Forked %s as "PID_FMT,
1658                         command->path, pid,
1659                         NULL);
1660
1661         /* We add the new process to the cgroup both in the child (so
1662          * that we can be sure that no user code is ever executed
1663          * outside of the cgroup) and in the parent (so that we can be
1664          * sure that when we kill the cgroup the process will be
1665          * killed too). */
1666         if (cgroup_path)
1667                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1668
1669         exec_status_start(&command->exec_status, pid);
1670
1671         *ret = pid;
1672         return 0;
1673 }
1674
1675 void exec_context_init(ExecContext *c) {
1676         assert(c);
1677
1678         c->umask = 0022;
1679         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1680         c->cpu_sched_policy = SCHED_OTHER;
1681         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1682         c->syslog_level_prefix = true;
1683         c->ignore_sigpipe = true;
1684         c->timer_slack_nsec = (nsec_t) -1;
1685 }
1686
1687 void exec_context_done(ExecContext *c) {
1688         unsigned l;
1689
1690         assert(c);
1691
1692         strv_free(c->environment);
1693         c->environment = NULL;
1694
1695         strv_free(c->environment_files);
1696         c->environment_files = NULL;
1697
1698         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1699                 free(c->rlimit[l]);
1700                 c->rlimit[l] = NULL;
1701         }
1702
1703         free(c->working_directory);
1704         c->working_directory = NULL;
1705         free(c->root_directory);
1706         c->root_directory = NULL;
1707
1708         free(c->tty_path);
1709         c->tty_path = NULL;
1710
1711         free(c->tcpwrap_name);
1712         c->tcpwrap_name = NULL;
1713
1714         free(c->syslog_identifier);
1715         c->syslog_identifier = NULL;
1716
1717         free(c->user);
1718         c->user = NULL;
1719
1720         free(c->group);
1721         c->group = NULL;
1722
1723         strv_free(c->supplementary_groups);
1724         c->supplementary_groups = NULL;
1725
1726         free(c->pam_name);
1727         c->pam_name = NULL;
1728
1729         if (c->capabilities) {
1730                 cap_free(c->capabilities);
1731                 c->capabilities = NULL;
1732         }
1733
1734         strv_free(c->read_only_dirs);
1735         c->read_only_dirs = NULL;
1736
1737         strv_free(c->read_write_dirs);
1738         c->read_write_dirs = NULL;
1739
1740         strv_free(c->inaccessible_dirs);
1741         c->inaccessible_dirs = NULL;
1742
1743         if (c->cpuset)
1744                 CPU_FREE(c->cpuset);
1745
1746         free(c->utmp_id);
1747         c->utmp_id = NULL;
1748
1749         free(c->selinux_context);
1750         c->selinux_context = NULL;
1751
1752         free(c->syscall_filter);
1753         c->syscall_filter = NULL;
1754 }
1755
1756 void exec_command_done(ExecCommand *c) {
1757         assert(c);
1758
1759         free(c->path);
1760         c->path = NULL;
1761
1762         strv_free(c->argv);
1763         c->argv = NULL;
1764 }
1765
1766 void exec_command_done_array(ExecCommand *c, unsigned n) {
1767         unsigned i;
1768
1769         for (i = 0; i < n; i++)
1770                 exec_command_done(c+i);
1771 }
1772
1773 void exec_command_free_list(ExecCommand *c) {
1774         ExecCommand *i;
1775
1776         while ((i = c)) {
1777                 LIST_REMOVE(command, c, i);
1778                 exec_command_done(i);
1779                 free(i);
1780         }
1781 }
1782
1783 void exec_command_free_array(ExecCommand **c, unsigned n) {
1784         unsigned i;
1785
1786         for (i = 0; i < n; i++) {
1787                 exec_command_free_list(c[i]);
1788                 c[i] = NULL;
1789         }
1790 }
1791
1792 int exec_context_load_environment(const ExecContext *c, char ***l) {
1793         char **i, **r = NULL;
1794
1795         assert(c);
1796         assert(l);
1797
1798         STRV_FOREACH(i, c->environment_files) {
1799                 char *fn;
1800                 int k;
1801                 bool ignore = false;
1802                 char **p;
1803                 _cleanup_globfree_ glob_t pglob = {};
1804                 int count, n;
1805
1806                 fn = *i;
1807
1808                 if (fn[0] == '-') {
1809                         ignore = true;
1810                         fn ++;
1811                 }
1812
1813                 if (!path_is_absolute(fn)) {
1814                         if (ignore)
1815                                 continue;
1816
1817                         strv_free(r);
1818                         return -EINVAL;
1819                 }
1820
1821                 /* Filename supports globbing, take all matching files */
1822                 errno = 0;
1823                 if (glob(fn, 0, NULL, &pglob) != 0) {
1824                         if (ignore)
1825                                 continue;
1826
1827                         strv_free(r);
1828                         return errno ? -errno : -EINVAL;
1829                 }
1830                 count = pglob.gl_pathc;
1831                 if (count == 0) {
1832                         if (ignore)
1833                                 continue;
1834
1835                         strv_free(r);
1836                         return -EINVAL;
1837                 }
1838                 for (n = 0; n < count; n++) {
1839                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1840                         if (k < 0) {
1841                                 if (ignore)
1842                                         continue;
1843
1844                                 strv_free(r);
1845                                 return k;
1846                         }
1847                         /* Log invalid environment variables with filename */
1848                         if (p)
1849                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1850
1851                         if (r == NULL)
1852                                 r = p;
1853                         else {
1854                                 char **m;
1855
1856                                 m = strv_env_merge(2, r, p);
1857                                 strv_free(r);
1858                                 strv_free(p);
1859                                 if (!m)
1860                                         return -ENOMEM;
1861
1862                                 r = m;
1863                         }
1864                 }
1865         }
1866
1867         *l = r;
1868
1869         return 0;
1870 }
1871
1872 static bool tty_may_match_dev_console(const char *tty) {
1873         char *active = NULL, *console;
1874         bool b;
1875
1876         if (startswith(tty, "/dev/"))
1877                 tty += 5;
1878
1879         /* trivial identity? */
1880         if (streq(tty, "console"))
1881                 return true;
1882
1883         console = resolve_dev_console(&active);
1884         /* if we could not resolve, assume it may */
1885         if (!console)
1886                 return true;
1887
1888         /* "tty0" means the active VC, so it may be the same sometimes */
1889         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1890         free(active);
1891
1892         return b;
1893 }
1894
1895 bool exec_context_may_touch_console(ExecContext *ec) {
1896         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1897                 is_terminal_input(ec->std_input) ||
1898                 is_terminal_output(ec->std_output) ||
1899                 is_terminal_output(ec->std_error)) &&
1900                tty_may_match_dev_console(tty_path(ec));
1901 }
1902
1903 static void strv_fprintf(FILE *f, char **l) {
1904         char **g;
1905
1906         assert(f);
1907
1908         STRV_FOREACH(g, l)
1909                 fprintf(f, " %s", *g);
1910 }
1911
1912 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1913         char **e;
1914         unsigned i;
1915
1916         assert(c);
1917         assert(f);
1918
1919         prefix = strempty(prefix);
1920
1921         fprintf(f,
1922                 "%sUMask: %04o\n"
1923                 "%sWorkingDirectory: %s\n"
1924                 "%sRootDirectory: %s\n"
1925                 "%sNonBlocking: %s\n"
1926                 "%sPrivateTmp: %s\n"
1927                 "%sPrivateNetwork: %s\n"
1928                 "%sPrivateDevices: %s\n"
1929                 "%sIgnoreSIGPIPE: %s\n",
1930                 prefix, c->umask,
1931                 prefix, c->working_directory ? c->working_directory : "/",
1932                 prefix, c->root_directory ? c->root_directory : "/",
1933                 prefix, yes_no(c->non_blocking),
1934                 prefix, yes_no(c->private_tmp),
1935                 prefix, yes_no(c->private_network),
1936                 prefix, yes_no(c->private_devices),
1937                 prefix, yes_no(c->ignore_sigpipe));
1938
1939         STRV_FOREACH(e, c->environment)
1940                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1941
1942         STRV_FOREACH(e, c->environment_files)
1943                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1944
1945         if (c->tcpwrap_name)
1946                 fprintf(f,
1947                         "%sTCPWrapName: %s\n",
1948                         prefix, c->tcpwrap_name);
1949
1950         if (c->nice_set)
1951                 fprintf(f,
1952                         "%sNice: %i\n",
1953                         prefix, c->nice);
1954
1955         if (c->oom_score_adjust_set)
1956                 fprintf(f,
1957                         "%sOOMScoreAdjust: %i\n",
1958                         prefix, c->oom_score_adjust);
1959
1960         for (i = 0; i < RLIM_NLIMITS; i++)
1961                 if (c->rlimit[i])
1962                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1963
1964         if (c->ioprio_set) {
1965                 char *class_str;
1966                 int r;
1967
1968                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1969                 if (r < 0)
1970                         class_str = NULL;
1971                 fprintf(f,
1972                         "%sIOSchedulingClass: %s\n"
1973                         "%sIOPriority: %i\n",
1974                         prefix, strna(class_str),
1975                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1976                 free(class_str);
1977         }
1978
1979         if (c->cpu_sched_set) {
1980                 char *policy_str;
1981                 int r;
1982
1983                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1984                 if (r < 0)
1985                         policy_str = NULL;
1986                 fprintf(f,
1987                         "%sCPUSchedulingPolicy: %s\n"
1988                         "%sCPUSchedulingPriority: %i\n"
1989                         "%sCPUSchedulingResetOnFork: %s\n",
1990                         prefix, strna(policy_str),
1991                         prefix, c->cpu_sched_priority,
1992                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1993                 free(policy_str);
1994         }
1995
1996         if (c->cpuset) {
1997                 fprintf(f, "%sCPUAffinity:", prefix);
1998                 for (i = 0; i < c->cpuset_ncpus; i++)
1999                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2000                                 fprintf(f, " %u", i);
2001                 fputs("\n", f);
2002         }
2003
2004         if (c->timer_slack_nsec != (nsec_t) -1)
2005                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2006
2007         fprintf(f,
2008                 "%sStandardInput: %s\n"
2009                 "%sStandardOutput: %s\n"
2010                 "%sStandardError: %s\n",
2011                 prefix, exec_input_to_string(c->std_input),
2012                 prefix, exec_output_to_string(c->std_output),
2013                 prefix, exec_output_to_string(c->std_error));
2014
2015         if (c->tty_path)
2016                 fprintf(f,
2017                         "%sTTYPath: %s\n"
2018                         "%sTTYReset: %s\n"
2019                         "%sTTYVHangup: %s\n"
2020                         "%sTTYVTDisallocate: %s\n",
2021                         prefix, c->tty_path,
2022                         prefix, yes_no(c->tty_reset),
2023                         prefix, yes_no(c->tty_vhangup),
2024                         prefix, yes_no(c->tty_vt_disallocate));
2025
2026         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2027             c->std_output == EXEC_OUTPUT_KMSG ||
2028             c->std_output == EXEC_OUTPUT_JOURNAL ||
2029             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2030             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2031             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2032             c->std_error == EXEC_OUTPUT_SYSLOG ||
2033             c->std_error == EXEC_OUTPUT_KMSG ||
2034             c->std_error == EXEC_OUTPUT_JOURNAL ||
2035             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2036             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2037             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2038
2039                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2040
2041                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2042                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2043
2044                 fprintf(f,
2045                         "%sSyslogFacility: %s\n"
2046                         "%sSyslogLevel: %s\n",
2047                         prefix, strna(fac_str),
2048                         prefix, strna(lvl_str));
2049         }
2050
2051         if (c->capabilities) {
2052                 _cleanup_cap_free_charp_ char *t;
2053
2054                 t = cap_to_text(c->capabilities, NULL);
2055                 if (t)
2056                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2057         }
2058
2059         if (c->secure_bits)
2060                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2061                         prefix,
2062                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2063                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2064                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2065                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2066                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2067                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2068
2069         if (c->capability_bounding_set_drop) {
2070                 unsigned long l;
2071                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2072
2073                 for (l = 0; l <= cap_last_cap(); l++)
2074                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2075                                 _cleanup_cap_free_charp_ char *t;
2076
2077                                 t = cap_to_name(l);
2078                                 if (t)
2079                                         fprintf(f, " %s", t);
2080                         }
2081
2082                 fputs("\n", f);
2083         }
2084
2085         if (c->user)
2086                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2087         if (c->group)
2088                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2089
2090         if (strv_length(c->supplementary_groups) > 0) {
2091                 fprintf(f, "%sSupplementaryGroups:", prefix);
2092                 strv_fprintf(f, c->supplementary_groups);
2093                 fputs("\n", f);
2094         }
2095
2096         if (c->pam_name)
2097                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2098
2099         if (strv_length(c->read_write_dirs) > 0) {
2100                 fprintf(f, "%sReadWriteDirs:", prefix);
2101                 strv_fprintf(f, c->read_write_dirs);
2102                 fputs("\n", f);
2103         }
2104
2105         if (strv_length(c->read_only_dirs) > 0) {
2106                 fprintf(f, "%sReadOnlyDirs:", prefix);
2107                 strv_fprintf(f, c->read_only_dirs);
2108                 fputs("\n", f);
2109         }
2110
2111         if (strv_length(c->inaccessible_dirs) > 0) {
2112                 fprintf(f, "%sInaccessibleDirs:", prefix);
2113                 strv_fprintf(f, c->inaccessible_dirs);
2114                 fputs("\n", f);
2115         }
2116
2117         if (c->utmp_id)
2118                 fprintf(f,
2119                         "%sUtmpIdentifier: %s\n",
2120                         prefix, c->utmp_id);
2121
2122         if (c->selinux_context)
2123                 fprintf(f,
2124                         "%sSELinuxContext: %s\n",
2125                         prefix, c->selinux_context);
2126 }
2127
2128 void exec_status_start(ExecStatus *s, pid_t pid) {
2129         assert(s);
2130
2131         zero(*s);
2132         s->pid = pid;
2133         dual_timestamp_get(&s->start_timestamp);
2134 }
2135
2136 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2137         assert(s);
2138
2139         if (s->pid && s->pid != pid)
2140                 zero(*s);
2141
2142         s->pid = pid;
2143         dual_timestamp_get(&s->exit_timestamp);
2144
2145         s->code = code;
2146         s->status = status;
2147
2148         if (context) {
2149                 if (context->utmp_id)
2150                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2151
2152                 exec_context_tty_reset(context);
2153         }
2154 }
2155
2156 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2157         char buf[FORMAT_TIMESTAMP_MAX];
2158
2159         assert(s);
2160         assert(f);
2161
2162         if (!prefix)
2163                 prefix = "";
2164
2165         if (s->pid <= 0)
2166                 return;
2167
2168         fprintf(f,
2169                 "%sPID: "PID_FMT"\n",
2170                 prefix, s->pid);
2171
2172         if (s->start_timestamp.realtime > 0)
2173                 fprintf(f,
2174                         "%sStart Timestamp: %s\n",
2175                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2176
2177         if (s->exit_timestamp.realtime > 0)
2178                 fprintf(f,
2179                         "%sExit Timestamp: %s\n"
2180                         "%sExit Code: %s\n"
2181                         "%sExit Status: %i\n",
2182                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2183                         prefix, sigchld_code_to_string(s->code),
2184                         prefix, s->status);
2185 }
2186
2187 char *exec_command_line(char **argv) {
2188         size_t k;
2189         char *n, *p, **a;
2190         bool first = true;
2191
2192         assert(argv);
2193
2194         k = 1;
2195         STRV_FOREACH(a, argv)
2196                 k += strlen(*a)+3;
2197
2198         if (!(n = new(char, k)))
2199                 return NULL;
2200
2201         p = n;
2202         STRV_FOREACH(a, argv) {
2203
2204                 if (!first)
2205                         *(p++) = ' ';
2206                 else
2207                         first = false;
2208
2209                 if (strpbrk(*a, WHITESPACE)) {
2210                         *(p++) = '\'';
2211                         p = stpcpy(p, *a);
2212                         *(p++) = '\'';
2213                 } else
2214                         p = stpcpy(p, *a);
2215
2216         }
2217
2218         *p = 0;
2219
2220         /* FIXME: this doesn't really handle arguments that have
2221          * spaces and ticks in them */
2222
2223         return n;
2224 }
2225
2226 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2227         char *p2;
2228         const char *prefix2;
2229
2230         char *cmd;
2231
2232         assert(c);
2233         assert(f);
2234
2235         if (!prefix)
2236                 prefix = "";
2237         p2 = strappend(prefix, "\t");
2238         prefix2 = p2 ? p2 : prefix;
2239
2240         cmd = exec_command_line(c->argv);
2241
2242         fprintf(f,
2243                 "%sCommand Line: %s\n",
2244                 prefix, cmd ? cmd : strerror(ENOMEM));
2245
2246         free(cmd);
2247
2248         exec_status_dump(&c->exec_status, f, prefix2);
2249
2250         free(p2);
2251 }
2252
2253 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2254         assert(f);
2255
2256         if (!prefix)
2257                 prefix = "";
2258
2259         LIST_FOREACH(command, c, c)
2260                 exec_command_dump(c, f, prefix);
2261 }
2262
2263 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2264         ExecCommand *end;
2265
2266         assert(l);
2267         assert(e);
2268
2269         if (*l) {
2270                 /* It's kind of important, that we keep the order here */
2271                 LIST_FIND_TAIL(command, *l, end);
2272                 LIST_INSERT_AFTER(command, *l, end, e);
2273         } else
2274               *l = e;
2275 }
2276
2277 int exec_command_set(ExecCommand *c, const char *path, ...) {
2278         va_list ap;
2279         char **l, *p;
2280
2281         assert(c);
2282         assert(path);
2283
2284         va_start(ap, path);
2285         l = strv_new_ap(path, ap);
2286         va_end(ap);
2287
2288         if (!l)
2289                 return -ENOMEM;
2290
2291         p = strdup(path);
2292         if (!p) {
2293                 strv_free(l);
2294                 return -ENOMEM;
2295         }
2296
2297         free(c->path);
2298         c->path = p;
2299
2300         strv_free(c->argv);
2301         c->argv = l;
2302
2303         return 0;
2304 }
2305
2306 static int exec_runtime_allocate(ExecRuntime **rt) {
2307
2308         if (*rt)
2309                 return 0;
2310
2311         *rt = new0(ExecRuntime, 1);
2312         if (!*rt)
2313                 return -ENOMEM;
2314
2315         (*rt)->n_ref = 1;
2316         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2317
2318         return 0;
2319 }
2320
2321 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2322         int r;
2323
2324         assert(rt);
2325         assert(c);
2326         assert(id);
2327
2328         if (*rt)
2329                 return 1;
2330
2331         if (!c->private_network && !c->private_tmp)
2332                 return 0;
2333
2334         r = exec_runtime_allocate(rt);
2335         if (r < 0)
2336                 return r;
2337
2338         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2339                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2340                         return -errno;
2341         }
2342
2343         if (c->private_tmp && !(*rt)->tmp_dir) {
2344                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2345                 if (r < 0)
2346                         return r;
2347         }
2348
2349         return 1;
2350 }
2351
2352 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2353         assert(r);
2354         assert(r->n_ref > 0);
2355
2356         r->n_ref++;
2357         return r;
2358 }
2359
2360 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2361
2362         if (!r)
2363                 return NULL;
2364
2365         assert(r->n_ref > 0);
2366
2367         r->n_ref--;
2368         if (r->n_ref <= 0) {
2369                 free(r->tmp_dir);
2370                 free(r->var_tmp_dir);
2371                 close_pipe(r->netns_storage_socket);
2372                 free(r);
2373         }
2374
2375         return NULL;
2376 }
2377
2378 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2379         assert(u);
2380         assert(f);
2381         assert(fds);
2382
2383         if (!rt)
2384                 return 0;
2385
2386         if (rt->tmp_dir)
2387                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2388
2389         if (rt->var_tmp_dir)
2390                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2391
2392         if (rt->netns_storage_socket[0] >= 0) {
2393                 int copy;
2394
2395                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2396                 if (copy < 0)
2397                         return copy;
2398
2399                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2400         }
2401
2402         if (rt->netns_storage_socket[1] >= 0) {
2403                 int copy;
2404
2405                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2406                 if (copy < 0)
2407                         return copy;
2408
2409                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2410         }
2411
2412         return 0;
2413 }
2414
2415 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2416         int r;
2417
2418         assert(rt);
2419         assert(key);
2420         assert(value);
2421
2422         if (streq(key, "tmp-dir")) {
2423                 char *copy;
2424
2425                 r = exec_runtime_allocate(rt);
2426                 if (r < 0)
2427                         return r;
2428
2429                 copy = strdup(value);
2430                 if (!copy)
2431                         return log_oom();
2432
2433                 free((*rt)->tmp_dir);
2434                 (*rt)->tmp_dir = copy;
2435
2436         } else if (streq(key, "var-tmp-dir")) {
2437                 char *copy;
2438
2439                 r = exec_runtime_allocate(rt);
2440                 if (r < 0)
2441                         return r;
2442
2443                 copy = strdup(value);
2444                 if (!copy)
2445                         return log_oom();
2446
2447                 free((*rt)->var_tmp_dir);
2448                 (*rt)->var_tmp_dir = copy;
2449
2450         } else if (streq(key, "netns-socket-0")) {
2451                 int fd;
2452
2453                 r = exec_runtime_allocate(rt);
2454                 if (r < 0)
2455                         return r;
2456
2457                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2458                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2459                 else {
2460                         if ((*rt)->netns_storage_socket[0] >= 0)
2461                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2462
2463                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2464                 }
2465         } else if (streq(key, "netns-socket-1")) {
2466                 int fd;
2467
2468                 r = exec_runtime_allocate(rt);
2469                 if (r < 0)
2470                         return r;
2471
2472                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2473                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2474                 else {
2475                         if ((*rt)->netns_storage_socket[1] >= 0)
2476                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2477
2478                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2479                 }
2480         } else
2481                 return 0;
2482
2483         return 1;
2484 }
2485
2486 static void *remove_tmpdir_thread(void *p) {
2487         _cleanup_free_ char *path = p;
2488
2489         rm_rf_dangerous(path, false, true, false);
2490         return NULL;
2491 }
2492
2493 void exec_runtime_destroy(ExecRuntime *rt) {
2494         if (!rt)
2495                 return;
2496
2497         /* If there are multiple users of this, let's leave the stuff around */
2498         if (rt->n_ref > 1)
2499                 return;
2500
2501         if (rt->tmp_dir) {
2502                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2503                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2504                 rt->tmp_dir = NULL;
2505         }
2506
2507         if (rt->var_tmp_dir) {
2508                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2509                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2510                 rt->var_tmp_dir = NULL;
2511         }
2512
2513         close_pipe(rt->netns_storage_socket);
2514 }
2515
2516 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2517         [EXEC_INPUT_NULL] = "null",
2518         [EXEC_INPUT_TTY] = "tty",
2519         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2520         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2521         [EXEC_INPUT_SOCKET] = "socket"
2522 };
2523
2524 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2525
2526 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2527         [EXEC_OUTPUT_INHERIT] = "inherit",
2528         [EXEC_OUTPUT_NULL] = "null",
2529         [EXEC_OUTPUT_TTY] = "tty",
2530         [EXEC_OUTPUT_SYSLOG] = "syslog",
2531         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2532         [EXEC_OUTPUT_KMSG] = "kmsg",
2533         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2534         [EXEC_OUTPUT_JOURNAL] = "journal",
2535         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2536         [EXEC_OUTPUT_SOCKET] = "socket"
2537 };
2538
2539 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);