chiark / gitweb /
syscallfilter: port to libseccomp
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <libgen.h>
43 #ifdef HAVE_SECCOMP
44 #include <seccomp.h>
45
46 #include "set.h"
47 #endif
48 #undef basename
49
50 #ifdef HAVE_PAM
51 #include <security/pam_appl.h>
52 #endif
53
54 #ifdef HAVE_SELINUX
55 #include <selinux/selinux.h>
56 #endif
57
58 #include "execute.h"
59 #include "strv.h"
60 #include "macro.h"
61 #include "capability.h"
62 #include "util.h"
63 #include "log.h"
64 #include "sd-messages.h"
65 #include "ioprio.h"
66 #include "securebits.h"
67 #include "namespace.h"
68 #include "tcpwrap.h"
69 #include "exit-status.h"
70 #include "missing.h"
71 #include "utmp-wtmp.h"
72 #include "def.h"
73 #include "path-util.h"
74 #include "env-util.h"
75 #include "fileio.h"
76 #include "unit.h"
77 #include "async.h"
78 #include "selinux-util.h"
79
80 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
81 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
82
83 /* This assumes there is a 'tty' group */
84 #define TTY_MODE 0620
85
86 #define SNDBUF_SIZE (8*1024*1024)
87
88 static int shift_fds(int fds[], unsigned n_fds) {
89         int start, restart_from;
90
91         if (n_fds <= 0)
92                 return 0;
93
94         /* Modifies the fds array! (sorts it) */
95
96         assert(fds);
97
98         start = 0;
99         for (;;) {
100                 int i;
101
102                 restart_from = -1;
103
104                 for (i = start; i < (int) n_fds; i++) {
105                         int nfd;
106
107                         /* Already at right index? */
108                         if (fds[i] == i+3)
109                                 continue;
110
111                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
112                                 return -errno;
113
114                         close_nointr_nofail(fds[i]);
115                         fds[i] = nfd;
116
117                         /* Hmm, the fd we wanted isn't free? Then
118                          * let's remember that and try again from here*/
119                         if (nfd != i+3 && restart_from < 0)
120                                 restart_from = i;
121                 }
122
123                 if (restart_from < 0)
124                         break;
125
126                 start = restart_from;
127         }
128
129         return 0;
130 }
131
132 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
133         unsigned i;
134         int r;
135
136         if (n_fds <= 0)
137                 return 0;
138
139         assert(fds);
140
141         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
142
143         for (i = 0; i < n_fds; i++) {
144
145                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
146                         return r;
147
148                 /* We unconditionally drop FD_CLOEXEC from the fds,
149                  * since after all we want to pass these fds to our
150                  * children */
151
152                 if ((r = fd_cloexec(fds[i], false)) < 0)
153                         return r;
154         }
155
156         return 0;
157 }
158
159 _pure_ static const char *tty_path(const ExecContext *context) {
160         assert(context);
161
162         if (context->tty_path)
163                 return context->tty_path;
164
165         return "/dev/console";
166 }
167
168 static void exec_context_tty_reset(const ExecContext *context) {
169         assert(context);
170
171         if (context->tty_vhangup)
172                 terminal_vhangup(tty_path(context));
173
174         if (context->tty_reset)
175                 reset_terminal(tty_path(context));
176
177         if (context->tty_vt_disallocate && context->tty_path)
178                 vt_disallocate(context->tty_path);
179 }
180
181 static bool is_terminal_output(ExecOutput o) {
182         return
183                 o == EXEC_OUTPUT_TTY ||
184                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
185                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
186                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
187 }
188
189 static int open_null_as(int flags, int nfd) {
190         int fd, r;
191
192         assert(nfd >= 0);
193
194         fd = open("/dev/null", flags|O_NOCTTY);
195         if (fd < 0)
196                 return -errno;
197
198         if (fd != nfd) {
199                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
200                 close_nointr_nofail(fd);
201         } else
202                 r = nfd;
203
204         return r;
205 }
206
207 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
208         int fd, r;
209         union sockaddr_union sa = {
210                 .un.sun_family = AF_UNIX,
211                 .un.sun_path = "/run/systemd/journal/stdout",
212         };
213
214         assert(context);
215         assert(output < _EXEC_OUTPUT_MAX);
216         assert(ident);
217         assert(nfd >= 0);
218
219         fd = socket(AF_UNIX, SOCK_STREAM, 0);
220         if (fd < 0)
221                 return -errno;
222
223         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
224         if (r < 0) {
225                 close_nointr_nofail(fd);
226                 return -errno;
227         }
228
229         if (shutdown(fd, SHUT_RD) < 0) {
230                 close_nointr_nofail(fd);
231                 return -errno;
232         }
233
234         fd_inc_sndbuf(fd, SNDBUF_SIZE);
235
236         dprintf(fd,
237                 "%s\n"
238                 "%s\n"
239                 "%i\n"
240                 "%i\n"
241                 "%i\n"
242                 "%i\n"
243                 "%i\n",
244                 context->syslog_identifier ? context->syslog_identifier : ident,
245                 unit_id,
246                 context->syslog_priority,
247                 !!context->syslog_level_prefix,
248                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
249                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
250                 is_terminal_output(output));
251
252         if (fd != nfd) {
253                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
254                 close_nointr_nofail(fd);
255         } else
256                 r = nfd;
257
258         return r;
259 }
260 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
261         int fd, r;
262
263         assert(path);
264         assert(nfd >= 0);
265
266         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
267                 return fd;
268
269         if (fd != nfd) {
270                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
271                 close_nointr_nofail(fd);
272         } else
273                 r = nfd;
274
275         return r;
276 }
277
278 static bool is_terminal_input(ExecInput i) {
279         return
280                 i == EXEC_INPUT_TTY ||
281                 i == EXEC_INPUT_TTY_FORCE ||
282                 i == EXEC_INPUT_TTY_FAIL;
283 }
284
285 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
286
287         if (is_terminal_input(std_input) && !apply_tty_stdin)
288                 return EXEC_INPUT_NULL;
289
290         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
291                 return EXEC_INPUT_NULL;
292
293         return std_input;
294 }
295
296 static int fixup_output(ExecOutput std_output, int socket_fd) {
297
298         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
299                 return EXEC_OUTPUT_INHERIT;
300
301         return std_output;
302 }
303
304 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
305         ExecInput i;
306
307         assert(context);
308
309         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
310
311         switch (i) {
312
313         case EXEC_INPUT_NULL:
314                 return open_null_as(O_RDONLY, STDIN_FILENO);
315
316         case EXEC_INPUT_TTY:
317         case EXEC_INPUT_TTY_FORCE:
318         case EXEC_INPUT_TTY_FAIL: {
319                 int fd, r;
320
321                 fd = acquire_terminal(tty_path(context),
322                                       i == EXEC_INPUT_TTY_FAIL,
323                                       i == EXEC_INPUT_TTY_FORCE,
324                                       false,
325                                       (usec_t) -1);
326                 if (fd < 0)
327                         return fd;
328
329                 if (fd != STDIN_FILENO) {
330                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
331                         close_nointr_nofail(fd);
332                 } else
333                         r = STDIN_FILENO;
334
335                 return r;
336         }
337
338         case EXEC_INPUT_SOCKET:
339                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
340
341         default:
342                 assert_not_reached("Unknown input type");
343         }
344 }
345
346 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
347         ExecOutput o;
348         ExecInput i;
349         int r;
350
351         assert(context);
352         assert(ident);
353
354         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
355         o = fixup_output(context->std_output, socket_fd);
356
357         if (fileno == STDERR_FILENO) {
358                 ExecOutput e;
359                 e = fixup_output(context->std_error, socket_fd);
360
361                 /* This expects the input and output are already set up */
362
363                 /* Don't change the stderr file descriptor if we inherit all
364                  * the way and are not on a tty */
365                 if (e == EXEC_OUTPUT_INHERIT &&
366                     o == EXEC_OUTPUT_INHERIT &&
367                     i == EXEC_INPUT_NULL &&
368                     !is_terminal_input(context->std_input) &&
369                     getppid () != 1)
370                         return fileno;
371
372                 /* Duplicate from stdout if possible */
373                 if (e == o || e == EXEC_OUTPUT_INHERIT)
374                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
375
376                 o = e;
377
378         } else if (o == EXEC_OUTPUT_INHERIT) {
379                 /* If input got downgraded, inherit the original value */
380                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
381                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
382
383                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
384                 if (i != EXEC_INPUT_NULL)
385                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
388                 if (getppid() != 1)
389                         return fileno;
390
391                 /* We need to open /dev/null here anew, to get the right access mode. */
392                 return open_null_as(O_WRONLY, fileno);
393         }
394
395         switch (o) {
396
397         case EXEC_OUTPUT_NULL:
398                 return open_null_as(O_WRONLY, fileno);
399
400         case EXEC_OUTPUT_TTY:
401                 if (is_terminal_input(i))
402                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
403
404                 /* We don't reset the terminal if this is just about output */
405                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
406
407         case EXEC_OUTPUT_SYSLOG:
408         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
409         case EXEC_OUTPUT_KMSG:
410         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
411         case EXEC_OUTPUT_JOURNAL:
412         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
413                 r = connect_logger_as(context, o, ident, unit_id, fileno);
414                 if (r < 0) {
415                         log_struct_unit(LOG_CRIT, unit_id,
416                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
417                                 fileno == STDOUT_FILENO ? "out" : "err",
418                                 unit_id, strerror(-r),
419                                 "ERRNO=%d", -r,
420                                 NULL);
421                         r = open_null_as(O_WRONLY, fileno);
422                 }
423                 return r;
424
425         case EXEC_OUTPUT_SOCKET:
426                 assert(socket_fd >= 0);
427                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
428
429         default:
430                 assert_not_reached("Unknown error type");
431         }
432 }
433
434 static int chown_terminal(int fd, uid_t uid) {
435         struct stat st;
436
437         assert(fd >= 0);
438
439         /* This might fail. What matters are the results. */
440         (void) fchown(fd, uid, -1);
441         (void) fchmod(fd, TTY_MODE);
442
443         if (fstat(fd, &st) < 0)
444                 return -errno;
445
446         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
447                 return -EPERM;
448
449         return 0;
450 }
451
452 static int setup_confirm_stdio(int *_saved_stdin,
453                                int *_saved_stdout) {
454         int fd = -1, saved_stdin, saved_stdout = -1, r;
455
456         assert(_saved_stdin);
457         assert(_saved_stdout);
458
459         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
460         if (saved_stdin < 0)
461                 return -errno;
462
463         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
464         if (saved_stdout < 0) {
465                 r = errno;
466                 goto fail;
467         }
468
469         fd = acquire_terminal(
470                         "/dev/console",
471                         false,
472                         false,
473                         false,
474                         DEFAULT_CONFIRM_USEC);
475         if (fd < 0) {
476                 r = fd;
477                 goto fail;
478         }
479
480         r = chown_terminal(fd, getuid());
481         if (r < 0)
482                 goto fail;
483
484         if (dup2(fd, STDIN_FILENO) < 0) {
485                 r = -errno;
486                 goto fail;
487         }
488
489         if (dup2(fd, STDOUT_FILENO) < 0) {
490                 r = -errno;
491                 goto fail;
492         }
493
494         if (fd >= 2)
495                 close_nointr_nofail(fd);
496
497         *_saved_stdin = saved_stdin;
498         *_saved_stdout = saved_stdout;
499
500         return 0;
501
502 fail:
503         if (saved_stdout >= 0)
504                 close_nointr_nofail(saved_stdout);
505
506         if (saved_stdin >= 0)
507                 close_nointr_nofail(saved_stdin);
508
509         if (fd >= 0)
510                 close_nointr_nofail(fd);
511
512         return r;
513 }
514
515 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
516         int fd;
517         va_list ap;
518
519         assert(format);
520
521         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
522         if (fd < 0)
523                 return fd;
524
525         va_start(ap, format);
526         vdprintf(fd, format, ap);
527         va_end(ap);
528
529         close_nointr_nofail(fd);
530
531         return 0;
532 }
533
534 static int restore_confirm_stdio(int *saved_stdin,
535                                  int *saved_stdout) {
536
537         int r = 0;
538
539         assert(saved_stdin);
540         assert(saved_stdout);
541
542         release_terminal();
543
544         if (*saved_stdin >= 0)
545                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
546                         r = -errno;
547
548         if (*saved_stdout >= 0)
549                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdin >= 0)
553                 close_nointr_nofail(*saved_stdin);
554
555         if (*saved_stdout >= 0)
556                 close_nointr_nofail(*saved_stdout);
557
558         return r;
559 }
560
561 static int ask_for_confirmation(char *response, char **argv) {
562         int saved_stdout = -1, saved_stdin = -1, r;
563         char *line;
564
565         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
566         if (r < 0)
567                 return r;
568
569         line = exec_command_line(argv);
570         if (!line)
571                 return -ENOMEM;
572
573         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
574         free(line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         assert(context);
662
663         /* Sets (but doesn't lookup) the uid and make sure we keep the
664          * capabilities while doing so. */
665
666         if (context->capabilities) {
667                 _cleanup_cap_free_ cap_t d = NULL;
668                 static const cap_value_t bits[] = {
669                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
670                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
671                 };
672
673                 /* First step: If we need to keep capabilities but
674                  * drop privileges we need to make sure we keep our
675                  * caps, while we drop privileges. */
676                 if (uid != 0) {
677                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
678
679                         if (prctl(PR_GET_SECUREBITS) != sb)
680                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681                                         return -errno;
682                 }
683
684                 /* Second step: set the capabilities. This will reduce
685                  * the capabilities to the minimum we need. */
686
687                 d = cap_dup(context->capabilities);
688                 if (!d)
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
693                         return -errno;
694
695                 if (cap_set_proc(d) < 0)
696                         return -errno;
697         }
698
699         /* Third step: actually set the uids */
700         if (setresuid(uid, uid, uid) < 0)
701                 return -errno;
702
703         /* At this point we should have all necessary capabilities but
704            are otherwise a normal user. However, the caps might got
705            corrupted due to the setresuid() so we need clean them up
706            later. This is done outside of this call. */
707
708         return 0;
709 }
710
711 #ifdef HAVE_PAM
712
713 static int null_conv(
714                 int num_msg,
715                 const struct pam_message **msg,
716                 struct pam_response **resp,
717                 void *appdata_ptr) {
718
719         /* We don't support conversations */
720
721         return PAM_CONV_ERR;
722 }
723
724 static int setup_pam(
725                 const char *name,
726                 const char *user,
727                 uid_t uid,
728                 const char *tty,
729                 char ***pam_env,
730                 int fds[], unsigned n_fds) {
731
732         static const struct pam_conv conv = {
733                 .conv = null_conv,
734                 .appdata_ptr = NULL
735         };
736
737         pam_handle_t *handle = NULL;
738         sigset_t ss, old_ss;
739         int pam_code = PAM_SUCCESS;
740         int err;
741         char **e = NULL;
742         bool close_session = false;
743         pid_t pam_pid = 0, parent_pid;
744         int flags = 0;
745
746         assert(name);
747         assert(user);
748         assert(pam_env);
749
750         /* We set up PAM in the parent process, then fork. The child
751          * will then stay around until killed via PR_GET_PDEATHSIG or
752          * systemd via the cgroup logic. It will then remove the PAM
753          * session again. The parent process will exec() the actual
754          * daemon. We do things this way to ensure that the main PID
755          * of the daemon is the one we initially fork()ed. */
756
757         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
758                 flags |= PAM_SILENT;
759
760         pam_code = pam_start(name, user, &conv, &handle);
761         if (pam_code != PAM_SUCCESS) {
762                 handle = NULL;
763                 goto fail;
764         }
765
766         if (tty) {
767                 pam_code = pam_set_item(handle, PAM_TTY, tty);
768                 if (pam_code != PAM_SUCCESS)
769                         goto fail;
770         }
771
772         pam_code = pam_acct_mgmt(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         pam_code = pam_open_session(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         e = pam_getenvlist(handle);
783         if (!e) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         pam_pid = fork();
798         if (pam_pid < 0)
799                 goto fail;
800
801         if (pam_pid == 0) {
802                 int sig;
803                 int r = EXIT_PAM;
804
805                 /* The child's job is to reset the PAM session on
806                  * termination */
807
808                 /* This string must fit in 10 chars (i.e. the length
809                  * of "/sbin/init"), to look pretty in /bin/ps */
810                 rename_process("(sd-pam)");
811
812                 /* Make sure we don't keep open the passed fds in this
813                 child. We assume that otherwise only those fds are
814                 open here that have been opened by PAM. */
815                 close_many(fds, n_fds);
816
817                 /* Drop privileges - we don't need any to pam_close_session
818                  * and this will make PR_SET_PDEATHSIG work in most cases.
819                  * If this fails, ignore the error - but expect sd-pam threads
820                  * to fail to exit normally */
821                 if (setresuid(uid, uid, uid) < 0)
822                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823
824                 /* Wait until our parent died. This will only work if
825                  * the above setresuid() succeeds, otherwise the kernel
826                  * will not allow unprivileged parents kill their privileged
827                  * children this way. We rely on the control groups kill logic
828                  * to do the rest for us. */
829                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830                         goto child_finish;
831
832                 /* Check if our parent process might already have
833                  * died? */
834                 if (getppid() == parent_pid) {
835                         for (;;) {
836                                 if (sigwait(&ss, &sig) < 0) {
837                                         if (errno == EINTR)
838                                                 continue;
839
840                                         goto child_finish;
841                                 }
842
843                                 assert(sig == SIGTERM);
844                                 break;
845                         }
846                 }
847
848                 /* If our parent died we'll end the session */
849                 if (getppid() != parent_pid) {
850                         pam_code = pam_close_session(handle, flags);
851                         if (pam_code != PAM_SUCCESS)
852                                 goto child_finish;
853                 }
854
855                 r = 0;
856
857         child_finish:
858                 pam_end(handle, pam_code | flags);
859                 _exit(r);
860         }
861
862         /* If the child was forked off successfully it will do all the
863          * cleanups, so forget about the handle here. */
864         handle = NULL;
865
866         /* Unblock SIGTERM again in the parent */
867         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
868                 goto fail;
869
870         /* We close the log explicitly here, since the PAM modules
871          * might have opened it, but we don't want this fd around. */
872         closelog();
873
874         *pam_env = e;
875         e = NULL;
876
877         return 0;
878
879 fail:
880         if (pam_code != PAM_SUCCESS) {
881                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882                 err = -EPERM;  /* PAM errors do not map to errno */
883         } else {
884                 log_error("PAM failed: %m");
885                 err = -errno;
886         }
887
888         if (handle) {
889                 if (close_session)
890                         pam_code = pam_close_session(handle, flags);
891
892                 pam_end(handle, pam_code | flags);
893         }
894
895         strv_free(e);
896
897         closelog();
898
899         if (pam_pid > 1) {
900                 kill(pam_pid, SIGTERM);
901                 kill(pam_pid, SIGCONT);
902         }
903
904         return err;
905 }
906 #endif
907
908 static void rename_process_from_path(const char *path) {
909         char process_name[11];
910         const char *p;
911         size_t l;
912
913         /* This resulting string must fit in 10 chars (i.e. the length
914          * of "/sbin/init") to look pretty in /bin/ps */
915
916         p = basename(path);
917         if (isempty(p)) {
918                 rename_process("(...)");
919                 return;
920         }
921
922         l = strlen(p);
923         if (l > 8) {
924                 /* The end of the process name is usually more
925                  * interesting, since the first bit might just be
926                  * "systemd-" */
927                 p = p + l - 8;
928                 l = 8;
929         }
930
931         process_name[0] = '(';
932         memcpy(process_name+1, p, l);
933         process_name[1+l] = ')';
934         process_name[1+l+1] = 0;
935
936         rename_process(process_name);
937 }
938
939 #ifdef HAVE_SECCOMP
940 static int apply_seccomp(ExecContext *c) {
941         uint32_t action = SCMP_ACT_ALLOW;
942         Iterator i;
943         void *id;
944
945         assert(c);
946
947         c->syscall_filter = seccomp_init(c->syscall_filter_default_action);
948         if (!c->syscall_filter)
949                 return -1;
950
951         if (c->syscall_filter_default_action == SCMP_ACT_ALLOW)
952                 action = SCMP_ACT_KILL;
953
954         SET_FOREACH(id, c->filtered_syscalls, i) {
955                 int r = seccomp_rule_add(c->syscall_filter, action, PTR_TO_INT(id) - 1, 0);
956                 if (r < 0) {
957                         log_error("Failed to add syscall filter");
958                         return r;
959                 }
960         }
961
962         return seccomp_load(c->syscall_filter);
963 }
964 #endif
965
966 static void do_idle_pipe_dance(int idle_pipe[4]) {
967         assert(idle_pipe);
968
969         if (idle_pipe[1] >= 0)
970                 close_nointr_nofail(idle_pipe[1]);
971         if (idle_pipe[2] >= 0)
972                 close_nointr_nofail(idle_pipe[2]);
973
974         if (idle_pipe[0] >= 0) {
975                 int r;
976
977                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
978
979                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
980                         /* Signal systemd that we are bored and want to continue. */
981                         write(idle_pipe[3], "x", 1);
982
983                         /* Wait for systemd to react to the signal above. */
984                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
985                 }
986
987                 close_nointr_nofail(idle_pipe[0]);
988
989         }
990
991         if (idle_pipe[3] >= 0)
992                 close_nointr_nofail(idle_pipe[3]);
993 }
994
995 static int build_environment(
996                 ExecContext *c,
997                 unsigned n_fds,
998                 usec_t watchdog_usec,
999                 const char *home,
1000                 const char *username,
1001                 const char *shell,
1002                 char ***ret) {
1003
1004         _cleanup_strv_free_ char **our_env = NULL;
1005         unsigned n_env = 0;
1006         char *x;
1007
1008         assert(c);
1009         assert(ret);
1010
1011         our_env = new0(char*, 10);
1012         if (!our_env)
1013                 return -ENOMEM;
1014
1015         if (n_fds > 0) {
1016                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1017                         return -ENOMEM;
1018                 our_env[n_env++] = x;
1019
1020                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1021                         return -ENOMEM;
1022                 our_env[n_env++] = x;
1023         }
1024
1025         if (watchdog_usec > 0) {
1026                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1027                         return -ENOMEM;
1028                 our_env[n_env++] = x;
1029
1030                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1031                         return -ENOMEM;
1032                 our_env[n_env++] = x;
1033         }
1034
1035         if (home) {
1036                 x = strappend("HOME=", home);
1037                 if (!x)
1038                         return -ENOMEM;
1039                 our_env[n_env++] = x;
1040         }
1041
1042         if (username) {
1043                 x = strappend("LOGNAME=", username);
1044                 if (!x)
1045                         return -ENOMEM;
1046                 our_env[n_env++] = x;
1047
1048                 x = strappend("USER=", username);
1049                 if (!x)
1050                         return -ENOMEM;
1051                 our_env[n_env++] = x;
1052         }
1053
1054         if (shell) {
1055                 x = strappend("SHELL=", shell);
1056                 if (!x)
1057                         return -ENOMEM;
1058                 our_env[n_env++] = x;
1059         }
1060
1061         if (is_terminal_input(c->std_input) ||
1062             c->std_output == EXEC_OUTPUT_TTY ||
1063             c->std_error == EXEC_OUTPUT_TTY ||
1064             c->tty_path) {
1065
1066                 x = strdup(default_term_for_tty(tty_path(c)));
1067                 if (!x)
1068                         return -ENOMEM;
1069                 our_env[n_env++] = x;
1070         }
1071
1072         our_env[n_env++] = NULL;
1073         assert(n_env <= 10);
1074
1075         *ret = our_env;
1076         our_env = NULL;
1077
1078         return 0;
1079 }
1080
1081 int exec_spawn(ExecCommand *command,
1082                char **argv,
1083                ExecContext *context,
1084                int fds[], unsigned n_fds,
1085                char **environment,
1086                bool apply_permissions,
1087                bool apply_chroot,
1088                bool apply_tty_stdin,
1089                bool confirm_spawn,
1090                CGroupControllerMask cgroup_supported,
1091                const char *cgroup_path,
1092                const char *unit_id,
1093                usec_t watchdog_usec,
1094                int idle_pipe[4],
1095                ExecRuntime *runtime,
1096                pid_t *ret) {
1097
1098         _cleanup_strv_free_ char **files_env = NULL;
1099         int socket_fd;
1100         char *line;
1101         pid_t pid;
1102         int r;
1103
1104         assert(command);
1105         assert(context);
1106         assert(ret);
1107         assert(fds || n_fds <= 0);
1108
1109         if (context->std_input == EXEC_INPUT_SOCKET ||
1110             context->std_output == EXEC_OUTPUT_SOCKET ||
1111             context->std_error == EXEC_OUTPUT_SOCKET) {
1112
1113                 if (n_fds != 1)
1114                         return -EINVAL;
1115
1116                 socket_fd = fds[0];
1117
1118                 fds = NULL;
1119                 n_fds = 0;
1120         } else
1121                 socket_fd = -1;
1122
1123         r = exec_context_load_environment(context, &files_env);
1124         if (r < 0) {
1125                 log_struct_unit(LOG_ERR,
1126                            unit_id,
1127                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1128                            "ERRNO=%d", -r,
1129                            NULL);
1130                 return r;
1131         }
1132
1133         if (!argv)
1134                 argv = command->argv;
1135
1136         line = exec_command_line(argv);
1137         if (!line)
1138                 return log_oom();
1139
1140         log_struct_unit(LOG_DEBUG,
1141                         unit_id,
1142                         "EXECUTABLE=%s", command->path,
1143                         "MESSAGE=About to execute: %s", line,
1144                         NULL);
1145         free(line);
1146
1147         pid = fork();
1148         if (pid < 0)
1149                 return -errno;
1150
1151         if (pid == 0) {
1152                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1153                 const char *username = NULL, *home = NULL, *shell = NULL;
1154                 unsigned n_dont_close = 0;
1155                 int dont_close[n_fds + 3];
1156                 uid_t uid = (uid_t) -1;
1157                 gid_t gid = (gid_t) -1;
1158                 sigset_t ss;
1159                 int i, err;
1160
1161                 /* child */
1162
1163                 rename_process_from_path(command->path);
1164
1165                 /* We reset exactly these signals, since they are the
1166                  * only ones we set to SIG_IGN in the main daemon. All
1167                  * others we leave untouched because we set them to
1168                  * SIG_DFL or a valid handler initially, both of which
1169                  * will be demoted to SIG_DFL. */
1170                 default_signals(SIGNALS_CRASH_HANDLER,
1171                                 SIGNALS_IGNORE, -1);
1172
1173                 if (context->ignore_sigpipe)
1174                         ignore_signals(SIGPIPE, -1);
1175
1176                 assert_se(sigemptyset(&ss) == 0);
1177                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1178                         err = -errno;
1179                         r = EXIT_SIGNAL_MASK;
1180                         goto fail_child;
1181                 }
1182
1183                 if (idle_pipe)
1184                         do_idle_pipe_dance(idle_pipe);
1185
1186                 /* Close sockets very early to make sure we don't
1187                  * block init reexecution because it cannot bind its
1188                  * sockets */
1189                 log_forget_fds();
1190
1191                 if (socket_fd >= 0)
1192                         dont_close[n_dont_close++] = socket_fd;
1193                 if (n_fds > 0) {
1194                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1195                         n_dont_close += n_fds;
1196                 }
1197                 if (runtime) {
1198                         if (runtime->netns_storage_socket[0] >= 0)
1199                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1200                         if (runtime->netns_storage_socket[1] >= 0)
1201                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1202                 }
1203
1204                 err = close_all_fds(dont_close, n_dont_close);
1205                 if (err < 0) {
1206                         r = EXIT_FDS;
1207                         goto fail_child;
1208                 }
1209
1210                 if (!context->same_pgrp)
1211                         if (setsid() < 0) {
1212                                 err = -errno;
1213                                 r = EXIT_SETSID;
1214                                 goto fail_child;
1215                         }
1216
1217                 if (context->tcpwrap_name) {
1218                         if (socket_fd >= 0)
1219                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1220                                         err = -EACCES;
1221                                         r = EXIT_TCPWRAP;
1222                                         goto fail_child;
1223                                 }
1224
1225                         for (i = 0; i < (int) n_fds; i++) {
1226                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1227                                         err = -EACCES;
1228                                         r = EXIT_TCPWRAP;
1229                                         goto fail_child;
1230                                 }
1231                         }
1232                 }
1233
1234                 exec_context_tty_reset(context);
1235
1236                 if (confirm_spawn) {
1237                         char response;
1238
1239                         err = ask_for_confirmation(&response, argv);
1240                         if (err == -ETIMEDOUT)
1241                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1242                         else if (err < 0)
1243                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1244                         else if (response == 's') {
1245                                 write_confirm_message("Skipping execution.\n");
1246                                 err = -ECANCELED;
1247                                 r = EXIT_CONFIRM;
1248                                 goto fail_child;
1249                         } else if (response == 'n') {
1250                                 write_confirm_message("Failing execution.\n");
1251                                 err = r = 0;
1252                                 goto fail_child;
1253                         }
1254                 }
1255
1256                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1257                  * must sure to drop O_NONBLOCK */
1258                 if (socket_fd >= 0)
1259                         fd_nonblock(socket_fd, false);
1260
1261                 err = setup_input(context, socket_fd, apply_tty_stdin);
1262                 if (err < 0) {
1263                         r = EXIT_STDIN;
1264                         goto fail_child;
1265                 }
1266
1267                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1268                 if (err < 0) {
1269                         r = EXIT_STDOUT;
1270                         goto fail_child;
1271                 }
1272
1273                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1274                 if (err < 0) {
1275                         r = EXIT_STDERR;
1276                         goto fail_child;
1277                 }
1278
1279                 if (cgroup_path) {
1280                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1281                         if (err < 0) {
1282                                 r = EXIT_CGROUP;
1283                                 goto fail_child;
1284                         }
1285                 }
1286
1287                 if (context->oom_score_adjust_set) {
1288                         char t[16];
1289
1290                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1291                         char_array_0(t);
1292
1293                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1294                                 err = -errno;
1295                                 r = EXIT_OOM_ADJUST;
1296                                 goto fail_child;
1297                         }
1298                 }
1299
1300                 if (context->nice_set)
1301                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1302                                 err = -errno;
1303                                 r = EXIT_NICE;
1304                                 goto fail_child;
1305                         }
1306
1307                 if (context->cpu_sched_set) {
1308                         struct sched_param param = {
1309                                 .sched_priority = context->cpu_sched_priority,
1310                         };
1311
1312                         r = sched_setscheduler(0,
1313                                                context->cpu_sched_policy |
1314                                                (context->cpu_sched_reset_on_fork ?
1315                                                 SCHED_RESET_ON_FORK : 0),
1316                                                &param);
1317                         if (r < 0) {
1318                                 err = -errno;
1319                                 r = EXIT_SETSCHEDULER;
1320                                 goto fail_child;
1321                         }
1322                 }
1323
1324                 if (context->cpuset)
1325                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1326                                 err = -errno;
1327                                 r = EXIT_CPUAFFINITY;
1328                                 goto fail_child;
1329                         }
1330
1331                 if (context->ioprio_set)
1332                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1333                                 err = -errno;
1334                                 r = EXIT_IOPRIO;
1335                                 goto fail_child;
1336                         }
1337
1338                 if (context->timer_slack_nsec != (nsec_t) -1)
1339                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1340                                 err = -errno;
1341                                 r = EXIT_TIMERSLACK;
1342                                 goto fail_child;
1343                         }
1344
1345                 if (context->utmp_id)
1346                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1347
1348                 if (context->user) {
1349                         username = context->user;
1350                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1351                         if (err < 0) {
1352                                 r = EXIT_USER;
1353                                 goto fail_child;
1354                         }
1355
1356                         if (is_terminal_input(context->std_input)) {
1357                                 err = chown_terminal(STDIN_FILENO, uid);
1358                                 if (err < 0) {
1359                                         r = EXIT_STDIN;
1360                                         goto fail_child;
1361                                 }
1362                         }
1363                 }
1364
1365 #ifdef HAVE_PAM
1366                 if (cgroup_path && context->user && context->pam_name) {
1367                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1368                         if (err < 0) {
1369                                 r = EXIT_CGROUP;
1370                                 goto fail_child;
1371                         }
1372
1373
1374                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1375                         if (err < 0) {
1376                                 r = EXIT_CGROUP;
1377                                 goto fail_child;
1378                         }
1379                 }
1380 #endif
1381
1382                 if (apply_permissions) {
1383                         err = enforce_groups(context, username, gid);
1384                         if (err < 0) {
1385                                 r = EXIT_GROUP;
1386                                 goto fail_child;
1387                         }
1388                 }
1389
1390                 umask(context->umask);
1391
1392 #ifdef HAVE_PAM
1393                 if (apply_permissions && context->pam_name && username) {
1394                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1395                         if (err < 0) {
1396                                 r = EXIT_PAM;
1397                                 goto fail_child;
1398                         }
1399                 }
1400 #endif
1401                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1402                         err = setup_netns(runtime->netns_storage_socket);
1403                         if (err < 0) {
1404                                 r = EXIT_NETWORK;
1405                                 goto fail_child;
1406                         }
1407                 }
1408
1409                 if (!strv_isempty(context->read_write_dirs) ||
1410                     !strv_isempty(context->read_only_dirs) ||
1411                     !strv_isempty(context->inaccessible_dirs) ||
1412                     context->mount_flags != 0 ||
1413                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1414                     context->private_devices) {
1415
1416                         char *tmp = NULL, *var = NULL;
1417
1418                         /* The runtime struct only contains the parent
1419                          * of the private /tmp, which is
1420                          * non-accessible to world users. Inside of it
1421                          * there's a /tmp that is sticky, and that's
1422                          * the one we want to use here. */
1423
1424                         if (context->private_tmp && runtime) {
1425                                 if (runtime->tmp_dir)
1426                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1427                                 if (runtime->var_tmp_dir)
1428                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1429                         }
1430
1431                         err = setup_namespace(
1432                                         context->read_write_dirs,
1433                                         context->read_only_dirs,
1434                                         context->inaccessible_dirs,
1435                                         tmp,
1436                                         var,
1437                                         context->private_devices,
1438                                         context->mount_flags);
1439
1440                         if (err < 0) {
1441                                 r = EXIT_NAMESPACE;
1442                                 goto fail_child;
1443                         }
1444                 }
1445
1446                 if (apply_chroot) {
1447                         if (context->root_directory)
1448                                 if (chroot(context->root_directory) < 0) {
1449                                         err = -errno;
1450                                         r = EXIT_CHROOT;
1451                                         goto fail_child;
1452                                 }
1453
1454                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1455                                 err = -errno;
1456                                 r = EXIT_CHDIR;
1457                                 goto fail_child;
1458                         }
1459                 } else {
1460                         _cleanup_free_ char *d = NULL;
1461
1462                         if (asprintf(&d, "%s/%s",
1463                                      context->root_directory ? context->root_directory : "",
1464                                      context->working_directory ? context->working_directory : "") < 0) {
1465                                 err = -ENOMEM;
1466                                 r = EXIT_MEMORY;
1467                                 goto fail_child;
1468                         }
1469
1470                         if (chdir(d) < 0) {
1471                                 err = -errno;
1472                                 r = EXIT_CHDIR;
1473                                 goto fail_child;
1474                         }
1475                 }
1476
1477                 /* We repeat the fd closing here, to make sure that
1478                  * nothing is leaked from the PAM modules */
1479                 err = close_all_fds(fds, n_fds);
1480                 if (err >= 0)
1481                         err = shift_fds(fds, n_fds);
1482                 if (err >= 0)
1483                         err = flags_fds(fds, n_fds, context->non_blocking);
1484                 if (err < 0) {
1485                         r = EXIT_FDS;
1486                         goto fail_child;
1487                 }
1488
1489                 if (apply_permissions) {
1490
1491                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1492                                 if (!context->rlimit[i])
1493                                         continue;
1494
1495                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1496                                         err = -errno;
1497                                         r = EXIT_LIMITS;
1498                                         goto fail_child;
1499                                 }
1500                         }
1501
1502                         if (context->capability_bounding_set_drop) {
1503                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1504                                 if (err < 0) {
1505                                         r = EXIT_CAPABILITIES;
1506                                         goto fail_child;
1507                                 }
1508                         }
1509
1510                         if (context->user) {
1511                                 err = enforce_user(context, uid);
1512                                 if (err < 0) {
1513                                         r = EXIT_USER;
1514                                         goto fail_child;
1515                                 }
1516                         }
1517
1518                         /* PR_GET_SECUREBITS is not privileged, while
1519                          * PR_SET_SECUREBITS is. So to suppress
1520                          * potential EPERMs we'll try not to call
1521                          * PR_SET_SECUREBITS unless necessary. */
1522                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1523                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1524                                         err = -errno;
1525                                         r = EXIT_SECUREBITS;
1526                                         goto fail_child;
1527                                 }
1528
1529                         if (context->capabilities)
1530                                 if (cap_set_proc(context->capabilities) < 0) {
1531                                         err = -errno;
1532                                         r = EXIT_CAPABILITIES;
1533                                         goto fail_child;
1534                                 }
1535
1536                         if (context->no_new_privileges)
1537                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1538                                         err = -errno;
1539                                         r = EXIT_NO_NEW_PRIVILEGES;
1540                                         goto fail_child;
1541                                 }
1542
1543 #ifdef HAVE_SECCOMP
1544                         if (context->filtered_syscalls) {
1545                                 err = apply_seccomp(context);
1546                                 if (err < 0) {
1547                                         r = EXIT_SECCOMP;
1548                                         goto fail_child;
1549                                 }
1550                         }
1551 #endif
1552 #ifdef HAVE_SELINUX
1553                         if (context->selinux_context && use_selinux()) {
1554                                 bool ignore;
1555                                 char* c;
1556
1557                                 c = context->selinux_context;
1558                                 if (c[0] == '-') {
1559                                         c++;
1560                                         ignore = true;
1561                                 } else
1562                                         ignore = false;
1563
1564                                 err = setexeccon(c);
1565                                 if (err < 0 && !ignore) {
1566                                         r = EXIT_SELINUX_CONTEXT;
1567                                         goto fail_child;
1568                                 }
1569                         }
1570 #endif
1571                 }
1572
1573                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1574                 if (r < 0) {
1575                         r = EXIT_MEMORY;
1576                         goto fail_child;
1577                 }
1578
1579                 final_env = strv_env_merge(5,
1580                                            environment,
1581                                            our_env,
1582                                            context->environment,
1583                                            files_env,
1584                                            pam_env,
1585                                            NULL);
1586                 if (!final_env) {
1587                         err = -ENOMEM;
1588                         r = EXIT_MEMORY;
1589                         goto fail_child;
1590                 }
1591
1592                 final_argv = replace_env_argv(argv, final_env);
1593                 if (!final_argv) {
1594                         err = -ENOMEM;
1595                         r = EXIT_MEMORY;
1596                         goto fail_child;
1597                 }
1598
1599                 final_env = strv_env_clean(final_env);
1600
1601                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1602                         line = exec_command_line(final_argv);
1603                         if (line) {
1604                                 log_open();
1605                                 log_struct_unit(LOG_DEBUG,
1606                                                 unit_id,
1607                                                 "EXECUTABLE=%s", command->path,
1608                                                 "MESSAGE=Executing: %s", line,
1609                                                 NULL);
1610                                 log_close();
1611                                 free(line);
1612                                 line = NULL;
1613                         }
1614                 }
1615                 execve(command->path, final_argv, final_env);
1616                 err = -errno;
1617                 r = EXIT_EXEC;
1618
1619         fail_child:
1620                 if (r != 0) {
1621                         log_open();
1622                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1623                                    "EXECUTABLE=%s", command->path,
1624                                    "MESSAGE=Failed at step %s spawning %s: %s",
1625                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1626                                           command->path, strerror(-err),
1627                                    "ERRNO=%d", -err,
1628                                    NULL);
1629                         log_close();
1630                 }
1631
1632                 _exit(r);
1633         }
1634
1635         log_struct_unit(LOG_DEBUG,
1636                         unit_id,
1637                         "MESSAGE=Forked %s as "PID_FMT,
1638                         command->path, pid,
1639                         NULL);
1640
1641         /* We add the new process to the cgroup both in the child (so
1642          * that we can be sure that no user code is ever executed
1643          * outside of the cgroup) and in the parent (so that we can be
1644          * sure that when we kill the cgroup the process will be
1645          * killed too). */
1646         if (cgroup_path)
1647                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1648
1649         exec_status_start(&command->exec_status, pid);
1650
1651         *ret = pid;
1652         return 0;
1653 }
1654
1655 void exec_context_init(ExecContext *c) {
1656         assert(c);
1657
1658         c->umask = 0022;
1659         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1660         c->cpu_sched_policy = SCHED_OTHER;
1661         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1662         c->syslog_level_prefix = true;
1663         c->ignore_sigpipe = true;
1664         c->timer_slack_nsec = (nsec_t) -1;
1665 }
1666
1667 void exec_context_done(ExecContext *c) {
1668         unsigned l;
1669
1670         assert(c);
1671
1672         strv_free(c->environment);
1673         c->environment = NULL;
1674
1675         strv_free(c->environment_files);
1676         c->environment_files = NULL;
1677
1678         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1679                 free(c->rlimit[l]);
1680                 c->rlimit[l] = NULL;
1681         }
1682
1683         free(c->working_directory);
1684         c->working_directory = NULL;
1685         free(c->root_directory);
1686         c->root_directory = NULL;
1687
1688         free(c->tty_path);
1689         c->tty_path = NULL;
1690
1691         free(c->tcpwrap_name);
1692         c->tcpwrap_name = NULL;
1693
1694         free(c->syslog_identifier);
1695         c->syslog_identifier = NULL;
1696
1697         free(c->user);
1698         c->user = NULL;
1699
1700         free(c->group);
1701         c->group = NULL;
1702
1703         strv_free(c->supplementary_groups);
1704         c->supplementary_groups = NULL;
1705
1706         free(c->pam_name);
1707         c->pam_name = NULL;
1708
1709         if (c->capabilities) {
1710                 cap_free(c->capabilities);
1711                 c->capabilities = NULL;
1712         }
1713
1714         strv_free(c->read_only_dirs);
1715         c->read_only_dirs = NULL;
1716
1717         strv_free(c->read_write_dirs);
1718         c->read_write_dirs = NULL;
1719
1720         strv_free(c->inaccessible_dirs);
1721         c->inaccessible_dirs = NULL;
1722
1723         if (c->cpuset)
1724                 CPU_FREE(c->cpuset);
1725
1726         free(c->utmp_id);
1727         c->utmp_id = NULL;
1728
1729         free(c->selinux_context);
1730         c->selinux_context = NULL;
1731
1732         free(c->syscall_filter);
1733         c->syscall_filter = NULL;
1734
1735         free(c->syscall_filter_string);
1736         c->syscall_filter_string = NULL;
1737
1738 #ifdef HAVE_SECCOMP
1739         if (c->syscall_filter) {
1740                 seccomp_release(c->syscall_filter);
1741                 c->syscall_filter = NULL;
1742         }
1743         set_free(c->filtered_syscalls);
1744         c->filtered_syscalls = NULL;
1745 #endif
1746 }
1747
1748 void exec_command_done(ExecCommand *c) {
1749         assert(c);
1750
1751         free(c->path);
1752         c->path = NULL;
1753
1754         strv_free(c->argv);
1755         c->argv = NULL;
1756 }
1757
1758 void exec_command_done_array(ExecCommand *c, unsigned n) {
1759         unsigned i;
1760
1761         for (i = 0; i < n; i++)
1762                 exec_command_done(c+i);
1763 }
1764
1765 void exec_command_free_list(ExecCommand *c) {
1766         ExecCommand *i;
1767
1768         while ((i = c)) {
1769                 LIST_REMOVE(command, c, i);
1770                 exec_command_done(i);
1771                 free(i);
1772         }
1773 }
1774
1775 void exec_command_free_array(ExecCommand **c, unsigned n) {
1776         unsigned i;
1777
1778         for (i = 0; i < n; i++) {
1779                 exec_command_free_list(c[i]);
1780                 c[i] = NULL;
1781         }
1782 }
1783
1784 int exec_context_load_environment(const ExecContext *c, char ***l) {
1785         char **i, **r = NULL;
1786
1787         assert(c);
1788         assert(l);
1789
1790         STRV_FOREACH(i, c->environment_files) {
1791                 char *fn;
1792                 int k;
1793                 bool ignore = false;
1794                 char **p;
1795                 _cleanup_globfree_ glob_t pglob = {};
1796                 int count, n;
1797
1798                 fn = *i;
1799
1800                 if (fn[0] == '-') {
1801                         ignore = true;
1802                         fn ++;
1803                 }
1804
1805                 if (!path_is_absolute(fn)) {
1806                         if (ignore)
1807                                 continue;
1808
1809                         strv_free(r);
1810                         return -EINVAL;
1811                 }
1812
1813                 /* Filename supports globbing, take all matching files */
1814                 errno = 0;
1815                 if (glob(fn, 0, NULL, &pglob) != 0) {
1816                         if (ignore)
1817                                 continue;
1818
1819                         strv_free(r);
1820                         return errno ? -errno : -EINVAL;
1821                 }
1822                 count = pglob.gl_pathc;
1823                 if (count == 0) {
1824                         if (ignore)
1825                                 continue;
1826
1827                         strv_free(r);
1828                         return -EINVAL;
1829                 }
1830                 for (n = 0; n < count; n++) {
1831                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1832                         if (k < 0) {
1833                                 if (ignore)
1834                                         continue;
1835
1836                                 strv_free(r);
1837                                 return k;
1838                         }
1839                         /* Log invalid environment variables with filename */
1840                         if (p)
1841                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1842
1843                         if (r == NULL)
1844                                 r = p;
1845                         else {
1846                                 char **m;
1847
1848                                 m = strv_env_merge(2, r, p);
1849                                 strv_free(r);
1850                                 strv_free(p);
1851                                 if (!m)
1852                                         return -ENOMEM;
1853
1854                                 r = m;
1855                         }
1856                 }
1857         }
1858
1859         *l = r;
1860
1861         return 0;
1862 }
1863
1864 static bool tty_may_match_dev_console(const char *tty) {
1865         char *active = NULL, *console;
1866         bool b;
1867
1868         if (startswith(tty, "/dev/"))
1869                 tty += 5;
1870
1871         /* trivial identity? */
1872         if (streq(tty, "console"))
1873                 return true;
1874
1875         console = resolve_dev_console(&active);
1876         /* if we could not resolve, assume it may */
1877         if (!console)
1878                 return true;
1879
1880         /* "tty0" means the active VC, so it may be the same sometimes */
1881         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1882         free(active);
1883
1884         return b;
1885 }
1886
1887 bool exec_context_may_touch_console(ExecContext *ec) {
1888         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1889                 is_terminal_input(ec->std_input) ||
1890                 is_terminal_output(ec->std_output) ||
1891                 is_terminal_output(ec->std_error)) &&
1892                tty_may_match_dev_console(tty_path(ec));
1893 }
1894
1895 static void strv_fprintf(FILE *f, char **l) {
1896         char **g;
1897
1898         assert(f);
1899
1900         STRV_FOREACH(g, l)
1901                 fprintf(f, " %s", *g);
1902 }
1903
1904 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1905         char **e;
1906         unsigned i;
1907
1908         assert(c);
1909         assert(f);
1910
1911         prefix = strempty(prefix);
1912
1913         fprintf(f,
1914                 "%sUMask: %04o\n"
1915                 "%sWorkingDirectory: %s\n"
1916                 "%sRootDirectory: %s\n"
1917                 "%sNonBlocking: %s\n"
1918                 "%sPrivateTmp: %s\n"
1919                 "%sPrivateNetwork: %s\n"
1920                 "%sPrivateDevices: %s\n"
1921                 "%sIgnoreSIGPIPE: %s\n",
1922                 prefix, c->umask,
1923                 prefix, c->working_directory ? c->working_directory : "/",
1924                 prefix, c->root_directory ? c->root_directory : "/",
1925                 prefix, yes_no(c->non_blocking),
1926                 prefix, yes_no(c->private_tmp),
1927                 prefix, yes_no(c->private_network),
1928                 prefix, yes_no(c->private_devices),
1929                 prefix, yes_no(c->ignore_sigpipe));
1930
1931         STRV_FOREACH(e, c->environment)
1932                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1933
1934         STRV_FOREACH(e, c->environment_files)
1935                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1936
1937         if (c->tcpwrap_name)
1938                 fprintf(f,
1939                         "%sTCPWrapName: %s\n",
1940                         prefix, c->tcpwrap_name);
1941
1942         if (c->nice_set)
1943                 fprintf(f,
1944                         "%sNice: %i\n",
1945                         prefix, c->nice);
1946
1947         if (c->oom_score_adjust_set)
1948                 fprintf(f,
1949                         "%sOOMScoreAdjust: %i\n",
1950                         prefix, c->oom_score_adjust);
1951
1952         for (i = 0; i < RLIM_NLIMITS; i++)
1953                 if (c->rlimit[i])
1954                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1955
1956         if (c->ioprio_set) {
1957                 char *class_str;
1958                 int r;
1959
1960                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1961                 if (r < 0)
1962                         class_str = NULL;
1963                 fprintf(f,
1964                         "%sIOSchedulingClass: %s\n"
1965                         "%sIOPriority: %i\n",
1966                         prefix, strna(class_str),
1967                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1968                 free(class_str);
1969         }
1970
1971         if (c->cpu_sched_set) {
1972                 char *policy_str;
1973                 int r;
1974
1975                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1976                 if (r < 0)
1977                         policy_str = NULL;
1978                 fprintf(f,
1979                         "%sCPUSchedulingPolicy: %s\n"
1980                         "%sCPUSchedulingPriority: %i\n"
1981                         "%sCPUSchedulingResetOnFork: %s\n",
1982                         prefix, strna(policy_str),
1983                         prefix, c->cpu_sched_priority,
1984                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1985                 free(policy_str);
1986         }
1987
1988         if (c->cpuset) {
1989                 fprintf(f, "%sCPUAffinity:", prefix);
1990                 for (i = 0; i < c->cpuset_ncpus; i++)
1991                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1992                                 fprintf(f, " %u", i);
1993                 fputs("\n", f);
1994         }
1995
1996         if (c->timer_slack_nsec != (nsec_t) -1)
1997                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
1998
1999         fprintf(f,
2000                 "%sStandardInput: %s\n"
2001                 "%sStandardOutput: %s\n"
2002                 "%sStandardError: %s\n",
2003                 prefix, exec_input_to_string(c->std_input),
2004                 prefix, exec_output_to_string(c->std_output),
2005                 prefix, exec_output_to_string(c->std_error));
2006
2007         if (c->tty_path)
2008                 fprintf(f,
2009                         "%sTTYPath: %s\n"
2010                         "%sTTYReset: %s\n"
2011                         "%sTTYVHangup: %s\n"
2012                         "%sTTYVTDisallocate: %s\n",
2013                         prefix, c->tty_path,
2014                         prefix, yes_no(c->tty_reset),
2015                         prefix, yes_no(c->tty_vhangup),
2016                         prefix, yes_no(c->tty_vt_disallocate));
2017
2018         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2019             c->std_output == EXEC_OUTPUT_KMSG ||
2020             c->std_output == EXEC_OUTPUT_JOURNAL ||
2021             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2022             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2023             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2024             c->std_error == EXEC_OUTPUT_SYSLOG ||
2025             c->std_error == EXEC_OUTPUT_KMSG ||
2026             c->std_error == EXEC_OUTPUT_JOURNAL ||
2027             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2028             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2029             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2030
2031                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2032
2033                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2034                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2035
2036                 fprintf(f,
2037                         "%sSyslogFacility: %s\n"
2038                         "%sSyslogLevel: %s\n",
2039                         prefix, strna(fac_str),
2040                         prefix, strna(lvl_str));
2041         }
2042
2043         if (c->capabilities) {
2044                 _cleanup_cap_free_charp_ char *t;
2045
2046                 t = cap_to_text(c->capabilities, NULL);
2047                 if (t)
2048                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2049         }
2050
2051         if (c->secure_bits)
2052                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2053                         prefix,
2054                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2055                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2056                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2057                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2058                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2059                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2060
2061         if (c->capability_bounding_set_drop) {
2062                 unsigned long l;
2063                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2064
2065                 for (l = 0; l <= cap_last_cap(); l++)
2066                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2067                                 _cleanup_cap_free_charp_ char *t;
2068
2069                                 t = cap_to_name(l);
2070                                 if (t)
2071                                         fprintf(f, " %s", t);
2072                         }
2073
2074                 fputs("\n", f);
2075         }
2076
2077         if (c->user)
2078                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2079         if (c->group)
2080                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2081
2082         if (strv_length(c->supplementary_groups) > 0) {
2083                 fprintf(f, "%sSupplementaryGroups:", prefix);
2084                 strv_fprintf(f, c->supplementary_groups);
2085                 fputs("\n", f);
2086         }
2087
2088         if (c->pam_name)
2089                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2090
2091         if (strv_length(c->read_write_dirs) > 0) {
2092                 fprintf(f, "%sReadWriteDirs:", prefix);
2093                 strv_fprintf(f, c->read_write_dirs);
2094                 fputs("\n", f);
2095         }
2096
2097         if (strv_length(c->read_only_dirs) > 0) {
2098                 fprintf(f, "%sReadOnlyDirs:", prefix);
2099                 strv_fprintf(f, c->read_only_dirs);
2100                 fputs("\n", f);
2101         }
2102
2103         if (strv_length(c->inaccessible_dirs) > 0) {
2104                 fprintf(f, "%sInaccessibleDirs:", prefix);
2105                 strv_fprintf(f, c->inaccessible_dirs);
2106                 fputs("\n", f);
2107         }
2108
2109         if (c->utmp_id)
2110                 fprintf(f,
2111                         "%sUtmpIdentifier: %s\n",
2112                         prefix, c->utmp_id);
2113
2114         if (c->selinux_context)
2115                 fprintf(f,
2116                         "%sSELinuxContext: %s\n",
2117                         prefix, c->selinux_context);
2118 }
2119
2120 void exec_status_start(ExecStatus *s, pid_t pid) {
2121         assert(s);
2122
2123         zero(*s);
2124         s->pid = pid;
2125         dual_timestamp_get(&s->start_timestamp);
2126 }
2127
2128 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2129         assert(s);
2130
2131         if (s->pid && s->pid != pid)
2132                 zero(*s);
2133
2134         s->pid = pid;
2135         dual_timestamp_get(&s->exit_timestamp);
2136
2137         s->code = code;
2138         s->status = status;
2139
2140         if (context) {
2141                 if (context->utmp_id)
2142                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2143
2144                 exec_context_tty_reset(context);
2145         }
2146 }
2147
2148 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2149         char buf[FORMAT_TIMESTAMP_MAX];
2150
2151         assert(s);
2152         assert(f);
2153
2154         if (!prefix)
2155                 prefix = "";
2156
2157         if (s->pid <= 0)
2158                 return;
2159
2160         fprintf(f,
2161                 "%sPID: "PID_FMT"\n",
2162                 prefix, s->pid);
2163
2164         if (s->start_timestamp.realtime > 0)
2165                 fprintf(f,
2166                         "%sStart Timestamp: %s\n",
2167                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2168
2169         if (s->exit_timestamp.realtime > 0)
2170                 fprintf(f,
2171                         "%sExit Timestamp: %s\n"
2172                         "%sExit Code: %s\n"
2173                         "%sExit Status: %i\n",
2174                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2175                         prefix, sigchld_code_to_string(s->code),
2176                         prefix, s->status);
2177 }
2178
2179 char *exec_command_line(char **argv) {
2180         size_t k;
2181         char *n, *p, **a;
2182         bool first = true;
2183
2184         assert(argv);
2185
2186         k = 1;
2187         STRV_FOREACH(a, argv)
2188                 k += strlen(*a)+3;
2189
2190         if (!(n = new(char, k)))
2191                 return NULL;
2192
2193         p = n;
2194         STRV_FOREACH(a, argv) {
2195
2196                 if (!first)
2197                         *(p++) = ' ';
2198                 else
2199                         first = false;
2200
2201                 if (strpbrk(*a, WHITESPACE)) {
2202                         *(p++) = '\'';
2203                         p = stpcpy(p, *a);
2204                         *(p++) = '\'';
2205                 } else
2206                         p = stpcpy(p, *a);
2207
2208         }
2209
2210         *p = 0;
2211
2212         /* FIXME: this doesn't really handle arguments that have
2213          * spaces and ticks in them */
2214
2215         return n;
2216 }
2217
2218 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2219         char *p2;
2220         const char *prefix2;
2221
2222         char *cmd;
2223
2224         assert(c);
2225         assert(f);
2226
2227         if (!prefix)
2228                 prefix = "";
2229         p2 = strappend(prefix, "\t");
2230         prefix2 = p2 ? p2 : prefix;
2231
2232         cmd = exec_command_line(c->argv);
2233
2234         fprintf(f,
2235                 "%sCommand Line: %s\n",
2236                 prefix, cmd ? cmd : strerror(ENOMEM));
2237
2238         free(cmd);
2239
2240         exec_status_dump(&c->exec_status, f, prefix2);
2241
2242         free(p2);
2243 }
2244
2245 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2246         assert(f);
2247
2248         if (!prefix)
2249                 prefix = "";
2250
2251         LIST_FOREACH(command, c, c)
2252                 exec_command_dump(c, f, prefix);
2253 }
2254
2255 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2256         ExecCommand *end;
2257
2258         assert(l);
2259         assert(e);
2260
2261         if (*l) {
2262                 /* It's kind of important, that we keep the order here */
2263                 LIST_FIND_TAIL(command, *l, end);
2264                 LIST_INSERT_AFTER(command, *l, end, e);
2265         } else
2266               *l = e;
2267 }
2268
2269 int exec_command_set(ExecCommand *c, const char *path, ...) {
2270         va_list ap;
2271         char **l, *p;
2272
2273         assert(c);
2274         assert(path);
2275
2276         va_start(ap, path);
2277         l = strv_new_ap(path, ap);
2278         va_end(ap);
2279
2280         if (!l)
2281                 return -ENOMEM;
2282
2283         p = strdup(path);
2284         if (!p) {
2285                 strv_free(l);
2286                 return -ENOMEM;
2287         }
2288
2289         free(c->path);
2290         c->path = p;
2291
2292         strv_free(c->argv);
2293         c->argv = l;
2294
2295         return 0;
2296 }
2297
2298 static int exec_runtime_allocate(ExecRuntime **rt) {
2299
2300         if (*rt)
2301                 return 0;
2302
2303         *rt = new0(ExecRuntime, 1);
2304         if (!*rt)
2305                 return -ENOMEM;
2306
2307         (*rt)->n_ref = 1;
2308         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2309
2310         return 0;
2311 }
2312
2313 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2314         int r;
2315
2316         assert(rt);
2317         assert(c);
2318         assert(id);
2319
2320         if (*rt)
2321                 return 1;
2322
2323         if (!c->private_network && !c->private_tmp)
2324                 return 0;
2325
2326         r = exec_runtime_allocate(rt);
2327         if (r < 0)
2328                 return r;
2329
2330         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2331                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2332                         return -errno;
2333         }
2334
2335         if (c->private_tmp && !(*rt)->tmp_dir) {
2336                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2337                 if (r < 0)
2338                         return r;
2339         }
2340
2341         return 1;
2342 }
2343
2344 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2345         assert(r);
2346         assert(r->n_ref > 0);
2347
2348         r->n_ref++;
2349         return r;
2350 }
2351
2352 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2353
2354         if (!r)
2355                 return NULL;
2356
2357         assert(r->n_ref > 0);
2358
2359         r->n_ref--;
2360         if (r->n_ref <= 0) {
2361                 free(r->tmp_dir);
2362                 free(r->var_tmp_dir);
2363                 close_pipe(r->netns_storage_socket);
2364                 free(r);
2365         }
2366
2367         return NULL;
2368 }
2369
2370 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2371         assert(u);
2372         assert(f);
2373         assert(fds);
2374
2375         if (!rt)
2376                 return 0;
2377
2378         if (rt->tmp_dir)
2379                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2380
2381         if (rt->var_tmp_dir)
2382                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2383
2384         if (rt->netns_storage_socket[0] >= 0) {
2385                 int copy;
2386
2387                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2388                 if (copy < 0)
2389                         return copy;
2390
2391                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2392         }
2393
2394         if (rt->netns_storage_socket[1] >= 0) {
2395                 int copy;
2396
2397                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2398                 if (copy < 0)
2399                         return copy;
2400
2401                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2402         }
2403
2404         return 0;
2405 }
2406
2407 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2408         int r;
2409
2410         assert(rt);
2411         assert(key);
2412         assert(value);
2413
2414         if (streq(key, "tmp-dir")) {
2415                 char *copy;
2416
2417                 r = exec_runtime_allocate(rt);
2418                 if (r < 0)
2419                         return r;
2420
2421                 copy = strdup(value);
2422                 if (!copy)
2423                         return log_oom();
2424
2425                 free((*rt)->tmp_dir);
2426                 (*rt)->tmp_dir = copy;
2427
2428         } else if (streq(key, "var-tmp-dir")) {
2429                 char *copy;
2430
2431                 r = exec_runtime_allocate(rt);
2432                 if (r < 0)
2433                         return r;
2434
2435                 copy = strdup(value);
2436                 if (!copy)
2437                         return log_oom();
2438
2439                 free((*rt)->var_tmp_dir);
2440                 (*rt)->var_tmp_dir = copy;
2441
2442         } else if (streq(key, "netns-socket-0")) {
2443                 int fd;
2444
2445                 r = exec_runtime_allocate(rt);
2446                 if (r < 0)
2447                         return r;
2448
2449                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2450                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2451                 else {
2452                         if ((*rt)->netns_storage_socket[0] >= 0)
2453                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2454
2455                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2456                 }
2457         } else if (streq(key, "netns-socket-1")) {
2458                 int fd;
2459
2460                 r = exec_runtime_allocate(rt);
2461                 if (r < 0)
2462                         return r;
2463
2464                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2465                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2466                 else {
2467                         if ((*rt)->netns_storage_socket[1] >= 0)
2468                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2469
2470                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2471                 }
2472         } else
2473                 return 0;
2474
2475         return 1;
2476 }
2477
2478 static void *remove_tmpdir_thread(void *p) {
2479         _cleanup_free_ char *path = p;
2480
2481         rm_rf_dangerous(path, false, true, false);
2482         return NULL;
2483 }
2484
2485 void exec_runtime_destroy(ExecRuntime *rt) {
2486         if (!rt)
2487                 return;
2488
2489         /* If there are multiple users of this, let's leave the stuff around */
2490         if (rt->n_ref > 1)
2491                 return;
2492
2493         if (rt->tmp_dir) {
2494                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2495                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2496                 rt->tmp_dir = NULL;
2497         }
2498
2499         if (rt->var_tmp_dir) {
2500                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2501                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2502                 rt->var_tmp_dir = NULL;
2503         }
2504
2505         close_pipe(rt->netns_storage_socket);
2506 }
2507
2508 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2509         [EXEC_INPUT_NULL] = "null",
2510         [EXEC_INPUT_TTY] = "tty",
2511         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2512         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2513         [EXEC_INPUT_SOCKET] = "socket"
2514 };
2515
2516 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2517
2518 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2519         [EXEC_OUTPUT_INHERIT] = "inherit",
2520         [EXEC_OUTPUT_NULL] = "null",
2521         [EXEC_OUTPUT_TTY] = "tty",
2522         [EXEC_OUTPUT_SYSLOG] = "syslog",
2523         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2524         [EXEC_OUTPUT_KMSG] = "kmsg",
2525         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2526         [EXEC_OUTPUT_JOURNAL] = "journal",
2527         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2528         [EXEC_OUTPUT_SOCKET] = "socket"
2529 };
2530
2531 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);