chiark / gitweb /
474a4af895e61b912bc58a7d0d52f7dd3bc800c2
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #include "execute.h"
55 #include "strv.h"
56 #include "macro.h"
57 #include "capability.h"
58 #include "util.h"
59 #include "log.h"
60 #include "sd-messages.h"
61 #include "ioprio.h"
62 #include "securebits.h"
63 #include "namespace.h"
64 #include "tcpwrap.h"
65 #include "exit-status.h"
66 #include "missing.h"
67 #include "utmp-wtmp.h"
68 #include "def.h"
69 #include "path-util.h"
70 #include "syscall-list.h"
71 #include "env-util.h"
72 #include "fileio.h"
73 #include "unit.h"
74 #include "async.h"
75
76 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
77 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
78
79 /* This assumes there is a 'tty' group */
80 #define TTY_MODE 0620
81
82 #define SNDBUF_SIZE (8*1024*1024)
83
84 static int shift_fds(int fds[], unsigned n_fds) {
85         int start, restart_from;
86
87         if (n_fds <= 0)
88                 return 0;
89
90         /* Modifies the fds array! (sorts it) */
91
92         assert(fds);
93
94         start = 0;
95         for (;;) {
96                 int i;
97
98                 restart_from = -1;
99
100                 for (i = start; i < (int) n_fds; i++) {
101                         int nfd;
102
103                         /* Already at right index? */
104                         if (fds[i] == i+3)
105                                 continue;
106
107                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
108                                 return -errno;
109
110                         close_nointr_nofail(fds[i]);
111                         fds[i] = nfd;
112
113                         /* Hmm, the fd we wanted isn't free? Then
114                          * let's remember that and try again from here*/
115                         if (nfd != i+3 && restart_from < 0)
116                                 restart_from = i;
117                 }
118
119                 if (restart_from < 0)
120                         break;
121
122                 start = restart_from;
123         }
124
125         return 0;
126 }
127
128 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
129         unsigned i;
130         int r;
131
132         if (n_fds <= 0)
133                 return 0;
134
135         assert(fds);
136
137         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
138
139         for (i = 0; i < n_fds; i++) {
140
141                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
142                         return r;
143
144                 /* We unconditionally drop FD_CLOEXEC from the fds,
145                  * since after all we want to pass these fds to our
146                  * children */
147
148                 if ((r = fd_cloexec(fds[i], false)) < 0)
149                         return r;
150         }
151
152         return 0;
153 }
154
155 _pure_ static const char *tty_path(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_path)
159                 return context->tty_path;
160
161         return "/dev/console";
162 }
163
164 static void exec_context_tty_reset(const ExecContext *context) {
165         assert(context);
166
167         if (context->tty_vhangup)
168                 terminal_vhangup(tty_path(context));
169
170         if (context->tty_reset)
171                 reset_terminal(tty_path(context));
172
173         if (context->tty_vt_disallocate && context->tty_path)
174                 vt_disallocate(context->tty_path);
175 }
176
177 static bool is_terminal_output(ExecOutput o) {
178         return
179                 o == EXEC_OUTPUT_TTY ||
180                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
181                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
182                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
183 }
184
185 static int open_null_as(int flags, int nfd) {
186         int fd, r;
187
188         assert(nfd >= 0);
189
190         fd = open("/dev/null", flags|O_NOCTTY);
191         if (fd < 0)
192                 return -errno;
193
194         if (fd != nfd) {
195                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
196                 close_nointr_nofail(fd);
197         } else
198                 r = nfd;
199
200         return r;
201 }
202
203 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
204         int fd, r;
205         union sockaddr_union sa = {
206                 .un.sun_family = AF_UNIX,
207                 .un.sun_path = "/run/systemd/journal/stdout",
208         };
209
210         assert(context);
211         assert(output < _EXEC_OUTPUT_MAX);
212         assert(ident);
213         assert(nfd >= 0);
214
215         fd = socket(AF_UNIX, SOCK_STREAM, 0);
216         if (fd < 0)
217                 return -errno;
218
219         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
220         if (r < 0) {
221                 close_nointr_nofail(fd);
222                 return -errno;
223         }
224
225         if (shutdown(fd, SHUT_RD) < 0) {
226                 close_nointr_nofail(fd);
227                 return -errno;
228         }
229
230         fd_inc_sndbuf(fd, SNDBUF_SIZE);
231
232         dprintf(fd,
233                 "%s\n"
234                 "%s\n"
235                 "%i\n"
236                 "%i\n"
237                 "%i\n"
238                 "%i\n"
239                 "%i\n",
240                 context->syslog_identifier ? context->syslog_identifier : ident,
241                 unit_id,
242                 context->syslog_priority,
243                 !!context->syslog_level_prefix,
244                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
245                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
246                 is_terminal_output(output));
247
248         if (fd != nfd) {
249                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
250                 close_nointr_nofail(fd);
251         } else
252                 r = nfd;
253
254         return r;
255 }
256 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
257         int fd, r;
258
259         assert(path);
260         assert(nfd >= 0);
261
262         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
263                 return fd;
264
265         if (fd != nfd) {
266                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
267                 close_nointr_nofail(fd);
268         } else
269                 r = nfd;
270
271         return r;
272 }
273
274 static bool is_terminal_input(ExecInput i) {
275         return
276                 i == EXEC_INPUT_TTY ||
277                 i == EXEC_INPUT_TTY_FORCE ||
278                 i == EXEC_INPUT_TTY_FAIL;
279 }
280
281 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
282
283         if (is_terminal_input(std_input) && !apply_tty_stdin)
284                 return EXEC_INPUT_NULL;
285
286         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
287                 return EXEC_INPUT_NULL;
288
289         return std_input;
290 }
291
292 static int fixup_output(ExecOutput std_output, int socket_fd) {
293
294         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
295                 return EXEC_OUTPUT_INHERIT;
296
297         return std_output;
298 }
299
300 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
301         ExecInput i;
302
303         assert(context);
304
305         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
306
307         switch (i) {
308
309         case EXEC_INPUT_NULL:
310                 return open_null_as(O_RDONLY, STDIN_FILENO);
311
312         case EXEC_INPUT_TTY:
313         case EXEC_INPUT_TTY_FORCE:
314         case EXEC_INPUT_TTY_FAIL: {
315                 int fd, r;
316
317                 fd = acquire_terminal(tty_path(context),
318                                       i == EXEC_INPUT_TTY_FAIL,
319                                       i == EXEC_INPUT_TTY_FORCE,
320                                       false,
321                                       (usec_t) -1);
322                 if (fd < 0)
323                         return fd;
324
325                 if (fd != STDIN_FILENO) {
326                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
327                         close_nointr_nofail(fd);
328                 } else
329                         r = STDIN_FILENO;
330
331                 return r;
332         }
333
334         case EXEC_INPUT_SOCKET:
335                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
336
337         default:
338                 assert_not_reached("Unknown input type");
339         }
340 }
341
342 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
343         ExecOutput o;
344         ExecInput i;
345         int r;
346
347         assert(context);
348         assert(ident);
349
350         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
351         o = fixup_output(context->std_output, socket_fd);
352
353         if (fileno == STDERR_FILENO) {
354                 ExecOutput e;
355                 e = fixup_output(context->std_error, socket_fd);
356
357                 /* This expects the input and output are already set up */
358
359                 /* Don't change the stderr file descriptor if we inherit all
360                  * the way and are not on a tty */
361                 if (e == EXEC_OUTPUT_INHERIT &&
362                     o == EXEC_OUTPUT_INHERIT &&
363                     i == EXEC_INPUT_NULL &&
364                     !is_terminal_input(context->std_input) &&
365                     getppid () != 1)
366                         return fileno;
367
368                 /* Duplicate from stdout if possible */
369                 if (e == o || e == EXEC_OUTPUT_INHERIT)
370                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
371
372                 o = e;
373
374         } else if (o == EXEC_OUTPUT_INHERIT) {
375                 /* If input got downgraded, inherit the original value */
376                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
377                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
378
379                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
380                 if (i != EXEC_INPUT_NULL)
381                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
382
383                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
384                 if (getppid() != 1)
385                         return fileno;
386
387                 /* We need to open /dev/null here anew, to get the right access mode. */
388                 return open_null_as(O_WRONLY, fileno);
389         }
390
391         switch (o) {
392
393         case EXEC_OUTPUT_NULL:
394                 return open_null_as(O_WRONLY, fileno);
395
396         case EXEC_OUTPUT_TTY:
397                 if (is_terminal_input(i))
398                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
399
400                 /* We don't reset the terminal if this is just about output */
401                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
402
403         case EXEC_OUTPUT_SYSLOG:
404         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
405         case EXEC_OUTPUT_KMSG:
406         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
407         case EXEC_OUTPUT_JOURNAL:
408         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
409                 r = connect_logger_as(context, o, ident, unit_id, fileno);
410                 if (r < 0) {
411                         log_struct_unit(LOG_CRIT, unit_id,
412                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
413                                 fileno == STDOUT_FILENO ? "out" : "err",
414                                 unit_id, strerror(-r),
415                                 "ERRNO=%d", -r,
416                                 NULL);
417                         r = open_null_as(O_WRONLY, fileno);
418                 }
419                 return r;
420
421         case EXEC_OUTPUT_SOCKET:
422                 assert(socket_fd >= 0);
423                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
424
425         default:
426                 assert_not_reached("Unknown error type");
427         }
428 }
429
430 static int chown_terminal(int fd, uid_t uid) {
431         struct stat st;
432
433         assert(fd >= 0);
434
435         /* This might fail. What matters are the results. */
436         (void) fchown(fd, uid, -1);
437         (void) fchmod(fd, TTY_MODE);
438
439         if (fstat(fd, &st) < 0)
440                 return -errno;
441
442         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
443                 return -EPERM;
444
445         return 0;
446 }
447
448 static int setup_confirm_stdio(int *_saved_stdin,
449                                int *_saved_stdout) {
450         int fd = -1, saved_stdin, saved_stdout = -1, r;
451
452         assert(_saved_stdin);
453         assert(_saved_stdout);
454
455         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
456         if (saved_stdin < 0)
457                 return -errno;
458
459         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
460         if (saved_stdout < 0) {
461                 r = errno;
462                 goto fail;
463         }
464
465         fd = acquire_terminal(
466                         "/dev/console",
467                         false,
468                         false,
469                         false,
470                         DEFAULT_CONFIRM_USEC);
471         if (fd < 0) {
472                 r = fd;
473                 goto fail;
474         }
475
476         r = chown_terminal(fd, getuid());
477         if (r < 0)
478                 goto fail;
479
480         if (dup2(fd, STDIN_FILENO) < 0) {
481                 r = -errno;
482                 goto fail;
483         }
484
485         if (dup2(fd, STDOUT_FILENO) < 0) {
486                 r = -errno;
487                 goto fail;
488         }
489
490         if (fd >= 2)
491                 close_nointr_nofail(fd);
492
493         *_saved_stdin = saved_stdin;
494         *_saved_stdout = saved_stdout;
495
496         return 0;
497
498 fail:
499         if (saved_stdout >= 0)
500                 close_nointr_nofail(saved_stdout);
501
502         if (saved_stdin >= 0)
503                 close_nointr_nofail(saved_stdin);
504
505         if (fd >= 0)
506                 close_nointr_nofail(fd);
507
508         return r;
509 }
510
511 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
512         int fd;
513         va_list ap;
514
515         assert(format);
516
517         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
518         if (fd < 0)
519                 return fd;
520
521         va_start(ap, format);
522         vdprintf(fd, format, ap);
523         va_end(ap);
524
525         close_nointr_nofail(fd);
526
527         return 0;
528 }
529
530 static int restore_confirm_stdio(int *saved_stdin,
531                                  int *saved_stdout) {
532
533         int r = 0;
534
535         assert(saved_stdin);
536         assert(saved_stdout);
537
538         release_terminal();
539
540         if (*saved_stdin >= 0)
541                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
542                         r = -errno;
543
544         if (*saved_stdout >= 0)
545                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
546                         r = -errno;
547
548         if (*saved_stdin >= 0)
549                 close_nointr_nofail(*saved_stdin);
550
551         if (*saved_stdout >= 0)
552                 close_nointr_nofail(*saved_stdout);
553
554         return r;
555 }
556
557 static int ask_for_confirmation(char *response, char **argv) {
558         int saved_stdout = -1, saved_stdin = -1, r;
559         char *line;
560
561         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
562         if (r < 0)
563                 return r;
564
565         line = exec_command_line(argv);
566         if (!line)
567                 return -ENOMEM;
568
569         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
570         free(line);
571
572         restore_confirm_stdio(&saved_stdin, &saved_stdout);
573
574         return r;
575 }
576
577 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
578         bool keep_groups = false;
579         int r;
580
581         assert(context);
582
583         /* Lookup and set GID and supplementary group list. Here too
584          * we avoid NSS lookups for gid=0. */
585
586         if (context->group || username) {
587
588                 if (context->group) {
589                         const char *g = context->group;
590
591                         if ((r = get_group_creds(&g, &gid)) < 0)
592                                 return r;
593                 }
594
595                 /* First step, initialize groups from /etc/groups */
596                 if (username && gid != 0) {
597                         if (initgroups(username, gid) < 0)
598                                 return -errno;
599
600                         keep_groups = true;
601                 }
602
603                 /* Second step, set our gids */
604                 if (setresgid(gid, gid, gid) < 0)
605                         return -errno;
606         }
607
608         if (context->supplementary_groups) {
609                 int ngroups_max, k;
610                 gid_t *gids;
611                 char **i;
612
613                 /* Final step, initialize any manually set supplementary groups */
614                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
615
616                 if (!(gids = new(gid_t, ngroups_max)))
617                         return -ENOMEM;
618
619                 if (keep_groups) {
620                         if ((k = getgroups(ngroups_max, gids)) < 0) {
621                                 free(gids);
622                                 return -errno;
623                         }
624                 } else
625                         k = 0;
626
627                 STRV_FOREACH(i, context->supplementary_groups) {
628                         const char *g;
629
630                         if (k >= ngroups_max) {
631                                 free(gids);
632                                 return -E2BIG;
633                         }
634
635                         g = *i;
636                         r = get_group_creds(&g, gids+k);
637                         if (r < 0) {
638                                 free(gids);
639                                 return r;
640                         }
641
642                         k++;
643                 }
644
645                 if (setgroups(k, gids) < 0) {
646                         free(gids);
647                         return -errno;
648                 }
649
650                 free(gids);
651         }
652
653         return 0;
654 }
655
656 static int enforce_user(const ExecContext *context, uid_t uid) {
657         assert(context);
658
659         /* Sets (but doesn't lookup) the uid and make sure we keep the
660          * capabilities while doing so. */
661
662         if (context->capabilities) {
663                 _cleanup_cap_free_ cap_t d = NULL;
664                 static const cap_value_t bits[] = {
665                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
666                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
667                 };
668
669                 /* First step: If we need to keep capabilities but
670                  * drop privileges we need to make sure we keep our
671                  * caps, while we drop privileges. */
672                 if (uid != 0) {
673                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
674
675                         if (prctl(PR_GET_SECUREBITS) != sb)
676                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
677                                         return -errno;
678                 }
679
680                 /* Second step: set the capabilities. This will reduce
681                  * the capabilities to the minimum we need. */
682
683                 d = cap_dup(context->capabilities);
684                 if (!d)
685                         return -errno;
686
687                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
688                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
689                         return -errno;
690
691                 if (cap_set_proc(d) < 0)
692                         return -errno;
693         }
694
695         /* Third step: actually set the uids */
696         if (setresuid(uid, uid, uid) < 0)
697                 return -errno;
698
699         /* At this point we should have all necessary capabilities but
700            are otherwise a normal user. However, the caps might got
701            corrupted due to the setresuid() so we need clean them up
702            later. This is done outside of this call. */
703
704         return 0;
705 }
706
707 #ifdef HAVE_PAM
708
709 static int null_conv(
710                 int num_msg,
711                 const struct pam_message **msg,
712                 struct pam_response **resp,
713                 void *appdata_ptr) {
714
715         /* We don't support conversations */
716
717         return PAM_CONV_ERR;
718 }
719
720 static int setup_pam(
721                 const char *name,
722                 const char *user,
723                 uid_t uid,
724                 const char *tty,
725                 char ***pam_env,
726                 int fds[], unsigned n_fds) {
727
728         static const struct pam_conv conv = {
729                 .conv = null_conv,
730                 .appdata_ptr = NULL
731         };
732
733         pam_handle_t *handle = NULL;
734         sigset_t ss, old_ss;
735         int pam_code = PAM_SUCCESS;
736         int err;
737         char **e = NULL;
738         bool close_session = false;
739         pid_t pam_pid = 0, parent_pid;
740         int flags = 0;
741
742         assert(name);
743         assert(user);
744         assert(pam_env);
745
746         /* We set up PAM in the parent process, then fork. The child
747          * will then stay around until killed via PR_GET_PDEATHSIG or
748          * systemd via the cgroup logic. It will then remove the PAM
749          * session again. The parent process will exec() the actual
750          * daemon. We do things this way to ensure that the main PID
751          * of the daemon is the one we initially fork()ed. */
752
753         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
754                 flags |= PAM_SILENT;
755
756         pam_code = pam_start(name, user, &conv, &handle);
757         if (pam_code != PAM_SUCCESS) {
758                 handle = NULL;
759                 goto fail;
760         }
761
762         if (tty) {
763                 pam_code = pam_set_item(handle, PAM_TTY, tty);
764                 if (pam_code != PAM_SUCCESS)
765                         goto fail;
766         }
767
768         pam_code = pam_acct_mgmt(handle, flags);
769         if (pam_code != PAM_SUCCESS)
770                 goto fail;
771
772         pam_code = pam_open_session(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         close_session = true;
777
778         e = pam_getenvlist(handle);
779         if (!e) {
780                 pam_code = PAM_BUF_ERR;
781                 goto fail;
782         }
783
784         /* Block SIGTERM, so that we know that it won't get lost in
785          * the child */
786         if (sigemptyset(&ss) < 0 ||
787             sigaddset(&ss, SIGTERM) < 0 ||
788             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
789                 goto fail;
790
791         parent_pid = getpid();
792
793         pam_pid = fork();
794         if (pam_pid < 0)
795                 goto fail;
796
797         if (pam_pid == 0) {
798                 int sig;
799                 int r = EXIT_PAM;
800
801                 /* The child's job is to reset the PAM session on
802                  * termination */
803
804                 /* This string must fit in 10 chars (i.e. the length
805                  * of "/sbin/init"), to look pretty in /bin/ps */
806                 rename_process("(sd-pam)");
807
808                 /* Make sure we don't keep open the passed fds in this
809                 child. We assume that otherwise only those fds are
810                 open here that have been opened by PAM. */
811                 close_many(fds, n_fds);
812
813                 /* Drop privileges - we don't need any to pam_close_session
814                  * and this will make PR_SET_PDEATHSIG work in most cases.
815                  * If this fails, ignore the error - but expect sd-pam threads
816                  * to fail to exit normally */
817                 if (setresuid(uid, uid, uid) < 0)
818                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
819
820                 /* Wait until our parent died. This will only work if
821                  * the above setresuid() succeeds, otherwise the kernel
822                  * will not allow unprivileged parents kill their privileged
823                  * children this way. We rely on the control groups kill logic
824                  * to do the rest for us. */
825                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
826                         goto child_finish;
827
828                 /* Check if our parent process might already have
829                  * died? */
830                 if (getppid() == parent_pid) {
831                         for (;;) {
832                                 if (sigwait(&ss, &sig) < 0) {
833                                         if (errno == EINTR)
834                                                 continue;
835
836                                         goto child_finish;
837                                 }
838
839                                 assert(sig == SIGTERM);
840                                 break;
841                         }
842                 }
843
844                 /* If our parent died we'll end the session */
845                 if (getppid() != parent_pid) {
846                         pam_code = pam_close_session(handle, flags);
847                         if (pam_code != PAM_SUCCESS)
848                                 goto child_finish;
849                 }
850
851                 r = 0;
852
853         child_finish:
854                 pam_end(handle, pam_code | flags);
855                 _exit(r);
856         }
857
858         /* If the child was forked off successfully it will do all the
859          * cleanups, so forget about the handle here. */
860         handle = NULL;
861
862         /* Unblock SIGTERM again in the parent */
863         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
864                 goto fail;
865
866         /* We close the log explicitly here, since the PAM modules
867          * might have opened it, but we don't want this fd around. */
868         closelog();
869
870         *pam_env = e;
871         e = NULL;
872
873         return 0;
874
875 fail:
876         if (pam_code != PAM_SUCCESS) {
877                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
878                 err = -EPERM;  /* PAM errors do not map to errno */
879         } else {
880                 log_error("PAM failed: %m");
881                 err = -errno;
882         }
883
884         if (handle) {
885                 if (close_session)
886                         pam_code = pam_close_session(handle, flags);
887
888                 pam_end(handle, pam_code | flags);
889         }
890
891         strv_free(e);
892
893         closelog();
894
895         if (pam_pid > 1) {
896                 kill(pam_pid, SIGTERM);
897                 kill(pam_pid, SIGCONT);
898         }
899
900         return err;
901 }
902 #endif
903
904 static void rename_process_from_path(const char *path) {
905         char process_name[11];
906         const char *p;
907         size_t l;
908
909         /* This resulting string must fit in 10 chars (i.e. the length
910          * of "/sbin/init") to look pretty in /bin/ps */
911
912         p = basename(path);
913         if (isempty(p)) {
914                 rename_process("(...)");
915                 return;
916         }
917
918         l = strlen(p);
919         if (l > 8) {
920                 /* The end of the process name is usually more
921                  * interesting, since the first bit might just be
922                  * "systemd-" */
923                 p = p + l - 8;
924                 l = 8;
925         }
926
927         process_name[0] = '(';
928         memcpy(process_name+1, p, l);
929         process_name[1+l] = ')';
930         process_name[1+l+1] = 0;
931
932         rename_process(process_name);
933 }
934
935 static int apply_seccomp(uint32_t *syscall_filter) {
936         static const struct sock_filter header[] = {
937                 VALIDATE_ARCHITECTURE,
938                 EXAMINE_SYSCALL
939         };
940         static const struct sock_filter footer[] = {
941                 _KILL_PROCESS
942         };
943
944         int i;
945         unsigned n;
946         struct sock_filter *f;
947         struct sock_fprog prog = {};
948
949         assert(syscall_filter);
950
951         /* First: count the syscalls to check for */
952         for (i = 0, n = 0; i < syscall_max(); i++)
953                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
954                         n++;
955
956         /* Second: build the filter program from a header the syscall
957          * matches and the footer */
958         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
959         memcpy(f, header, sizeof(header));
960
961         for (i = 0, n = 0; i < syscall_max(); i++)
962                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
963                         struct sock_filter item[] = {
964                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
965                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
966                         };
967
968                         assert_cc(ELEMENTSOF(item) == 2);
969
970                         f[ELEMENTSOF(header) + 2*n]  = item[0];
971                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
972
973                         n++;
974                 }
975
976         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
977
978         /* Third: install the filter */
979         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
980         prog.filter = f;
981         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
982                 return -errno;
983
984         return 0;
985 }
986
987 static void do_idle_pipe_dance(int idle_pipe[4]) {
988         assert(idle_pipe);
989
990         if (idle_pipe[1] >= 0)
991                 close_nointr_nofail(idle_pipe[1]);
992         if (idle_pipe[2] >= 0)
993                 close_nointr_nofail(idle_pipe[2]);
994
995         if (idle_pipe[0] >= 0) {
996                 int r;
997
998                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
999
1000                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1001                         /* Signal systemd that we are bored and want to continue. */
1002                         write(idle_pipe[3], "x", 1);
1003
1004                         /* Wait for systemd to react to the signal above. */
1005                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1006                 }
1007
1008                 close_nointr_nofail(idle_pipe[0]);
1009
1010         }
1011
1012         if (idle_pipe[3] >= 0)
1013                 close_nointr_nofail(idle_pipe[3]);
1014 }
1015
1016 static int build_environment(
1017                 ExecContext *c,
1018                 unsigned n_fds,
1019                 usec_t watchdog_usec,
1020                 const char *home,
1021                 const char *username,
1022                 const char *shell,
1023                 char ***ret) {
1024
1025         _cleanup_strv_free_ char **our_env = NULL;
1026         unsigned n_env = 0;
1027         char *x;
1028
1029         assert(c);
1030         assert(ret);
1031
1032         our_env = new0(char*, 10);
1033         if (!our_env)
1034                 return -ENOMEM;
1035
1036         if (n_fds > 0) {
1037                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1038                         return -ENOMEM;
1039                 our_env[n_env++] = x;
1040
1041                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1042                         return -ENOMEM;
1043                 our_env[n_env++] = x;
1044         }
1045
1046         if (watchdog_usec > 0) {
1047                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1048                         return -ENOMEM;
1049                 our_env[n_env++] = x;
1050
1051                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1052                         return -ENOMEM;
1053                 our_env[n_env++] = x;
1054         }
1055
1056         if (home) {
1057                 x = strappend("HOME=", home);
1058                 if (!x)
1059                         return -ENOMEM;
1060                 our_env[n_env++] = x;
1061         }
1062
1063         if (username) {
1064                 x = strappend("LOGNAME=", username);
1065                 if (!x)
1066                         return -ENOMEM;
1067                 our_env[n_env++] = x;
1068
1069                 x = strappend("USER=", username);
1070                 if (!x)
1071                         return -ENOMEM;
1072                 our_env[n_env++] = x;
1073         }
1074
1075         if (shell) {
1076                 x = strappend("SHELL=", shell);
1077                 if (!x)
1078                         return -ENOMEM;
1079                 our_env[n_env++] = x;
1080         }
1081
1082         if (is_terminal_input(c->std_input) ||
1083             c->std_output == EXEC_OUTPUT_TTY ||
1084             c->std_error == EXEC_OUTPUT_TTY ||
1085             c->tty_path) {
1086
1087                 x = strdup(default_term_for_tty(tty_path(c)));
1088                 if (!x)
1089                         return -ENOMEM;
1090                 our_env[n_env++] = x;
1091         }
1092
1093         our_env[n_env++] = NULL;
1094         assert(n_env <= 10);
1095
1096         *ret = our_env;
1097         our_env = NULL;
1098
1099         return 0;
1100 }
1101
1102 int exec_spawn(ExecCommand *command,
1103                char **argv,
1104                ExecContext *context,
1105                int fds[], unsigned n_fds,
1106                char **environment,
1107                bool apply_permissions,
1108                bool apply_chroot,
1109                bool apply_tty_stdin,
1110                bool confirm_spawn,
1111                CGroupControllerMask cgroup_supported,
1112                const char *cgroup_path,
1113                const char *unit_id,
1114                usec_t watchdog_usec,
1115                int idle_pipe[4],
1116                ExecRuntime *runtime,
1117                pid_t *ret) {
1118
1119         _cleanup_strv_free_ char **files_env = NULL;
1120         int socket_fd;
1121         char *line;
1122         pid_t pid;
1123         int r;
1124
1125         assert(command);
1126         assert(context);
1127         assert(ret);
1128         assert(fds || n_fds <= 0);
1129
1130         if (context->std_input == EXEC_INPUT_SOCKET ||
1131             context->std_output == EXEC_OUTPUT_SOCKET ||
1132             context->std_error == EXEC_OUTPUT_SOCKET) {
1133
1134                 if (n_fds != 1)
1135                         return -EINVAL;
1136
1137                 socket_fd = fds[0];
1138
1139                 fds = NULL;
1140                 n_fds = 0;
1141         } else
1142                 socket_fd = -1;
1143
1144         r = exec_context_load_environment(context, &files_env);
1145         if (r < 0) {
1146                 log_struct_unit(LOG_ERR,
1147                            unit_id,
1148                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1149                            "ERRNO=%d", -r,
1150                            NULL);
1151                 return r;
1152         }
1153
1154         if (!argv)
1155                 argv = command->argv;
1156
1157         line = exec_command_line(argv);
1158         if (!line)
1159                 return log_oom();
1160
1161         log_struct_unit(LOG_DEBUG,
1162                         unit_id,
1163                         "EXECUTABLE=%s", command->path,
1164                         "MESSAGE=About to execute: %s", line,
1165                         NULL);
1166         free(line);
1167
1168         pid = fork();
1169         if (pid < 0)
1170                 return -errno;
1171
1172         if (pid == 0) {
1173                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1174                 const char *username = NULL, *home = NULL, *shell = NULL;
1175                 unsigned n_dont_close = 0;
1176                 int dont_close[n_fds + 3];
1177                 uid_t uid = (uid_t) -1;
1178                 gid_t gid = (gid_t) -1;
1179                 sigset_t ss;
1180                 int i, err;
1181
1182                 /* child */
1183
1184                 rename_process_from_path(command->path);
1185
1186                 /* We reset exactly these signals, since they are the
1187                  * only ones we set to SIG_IGN in the main daemon. All
1188                  * others we leave untouched because we set them to
1189                  * SIG_DFL or a valid handler initially, both of which
1190                  * will be demoted to SIG_DFL. */
1191                 default_signals(SIGNALS_CRASH_HANDLER,
1192                                 SIGNALS_IGNORE, -1);
1193
1194                 if (context->ignore_sigpipe)
1195                         ignore_signals(SIGPIPE, -1);
1196
1197                 assert_se(sigemptyset(&ss) == 0);
1198                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1199                         err = -errno;
1200                         r = EXIT_SIGNAL_MASK;
1201                         goto fail_child;
1202                 }
1203
1204                 if (idle_pipe)
1205                         do_idle_pipe_dance(idle_pipe);
1206
1207                 /* Close sockets very early to make sure we don't
1208                  * block init reexecution because it cannot bind its
1209                  * sockets */
1210                 log_forget_fds();
1211
1212                 if (socket_fd >= 0)
1213                         dont_close[n_dont_close++] = socket_fd;
1214                 if (n_fds > 0) {
1215                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1216                         n_dont_close += n_fds;
1217                 }
1218                 if (runtime) {
1219                         if (runtime->netns_storage_socket[0] >= 0)
1220                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1221                         if (runtime->netns_storage_socket[1] >= 0)
1222                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1223                 }
1224
1225                 err = close_all_fds(dont_close, n_dont_close);
1226                 if (err < 0) {
1227                         r = EXIT_FDS;
1228                         goto fail_child;
1229                 }
1230
1231                 if (!context->same_pgrp)
1232                         if (setsid() < 0) {
1233                                 err = -errno;
1234                                 r = EXIT_SETSID;
1235                                 goto fail_child;
1236                         }
1237
1238                 if (context->tcpwrap_name) {
1239                         if (socket_fd >= 0)
1240                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1241                                         err = -EACCES;
1242                                         r = EXIT_TCPWRAP;
1243                                         goto fail_child;
1244                                 }
1245
1246                         for (i = 0; i < (int) n_fds; i++) {
1247                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1248                                         err = -EACCES;
1249                                         r = EXIT_TCPWRAP;
1250                                         goto fail_child;
1251                                 }
1252                         }
1253                 }
1254
1255                 exec_context_tty_reset(context);
1256
1257                 if (confirm_spawn) {
1258                         char response;
1259
1260                         err = ask_for_confirmation(&response, argv);
1261                         if (err == -ETIMEDOUT)
1262                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1263                         else if (err < 0)
1264                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1265                         else if (response == 's') {
1266                                 write_confirm_message("Skipping execution.\n");
1267                                 err = -ECANCELED;
1268                                 r = EXIT_CONFIRM;
1269                                 goto fail_child;
1270                         } else if (response == 'n') {
1271                                 write_confirm_message("Failing execution.\n");
1272                                 err = r = 0;
1273                                 goto fail_child;
1274                         }
1275                 }
1276
1277                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1278                  * must sure to drop O_NONBLOCK */
1279                 if (socket_fd >= 0)
1280                         fd_nonblock(socket_fd, false);
1281
1282                 err = setup_input(context, socket_fd, apply_tty_stdin);
1283                 if (err < 0) {
1284                         r = EXIT_STDIN;
1285                         goto fail_child;
1286                 }
1287
1288                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1289                 if (err < 0) {
1290                         r = EXIT_STDOUT;
1291                         goto fail_child;
1292                 }
1293
1294                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1295                 if (err < 0) {
1296                         r = EXIT_STDERR;
1297                         goto fail_child;
1298                 }
1299
1300                 if (cgroup_path) {
1301                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1302                         if (err < 0) {
1303                                 r = EXIT_CGROUP;
1304                                 goto fail_child;
1305                         }
1306                 }
1307
1308                 if (context->oom_score_adjust_set) {
1309                         char t[16];
1310
1311                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1312                         char_array_0(t);
1313
1314                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1315                                 err = -errno;
1316                                 r = EXIT_OOM_ADJUST;
1317                                 goto fail_child;
1318                         }
1319                 }
1320
1321                 if (context->nice_set)
1322                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1323                                 err = -errno;
1324                                 r = EXIT_NICE;
1325                                 goto fail_child;
1326                         }
1327
1328                 if (context->cpu_sched_set) {
1329                         struct sched_param param = {
1330                                 .sched_priority = context->cpu_sched_priority,
1331                         };
1332
1333                         r = sched_setscheduler(0,
1334                                                context->cpu_sched_policy |
1335                                                (context->cpu_sched_reset_on_fork ?
1336                                                 SCHED_RESET_ON_FORK : 0),
1337                                                &param);
1338                         if (r < 0) {
1339                                 err = -errno;
1340                                 r = EXIT_SETSCHEDULER;
1341                                 goto fail_child;
1342                         }
1343                 }
1344
1345                 if (context->cpuset)
1346                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1347                                 err = -errno;
1348                                 r = EXIT_CPUAFFINITY;
1349                                 goto fail_child;
1350                         }
1351
1352                 if (context->ioprio_set)
1353                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1354                                 err = -errno;
1355                                 r = EXIT_IOPRIO;
1356                                 goto fail_child;
1357                         }
1358
1359                 if (context->timer_slack_nsec != (nsec_t) -1)
1360                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1361                                 err = -errno;
1362                                 r = EXIT_TIMERSLACK;
1363                                 goto fail_child;
1364                         }
1365
1366                 if (context->utmp_id)
1367                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1368
1369                 if (context->user) {
1370                         username = context->user;
1371                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1372                         if (err < 0) {
1373                                 r = EXIT_USER;
1374                                 goto fail_child;
1375                         }
1376
1377                         if (is_terminal_input(context->std_input)) {
1378                                 err = chown_terminal(STDIN_FILENO, uid);
1379                                 if (err < 0) {
1380                                         r = EXIT_STDIN;
1381                                         goto fail_child;
1382                                 }
1383                         }
1384                 }
1385
1386 #ifdef HAVE_PAM
1387                 if (cgroup_path && context->user && context->pam_name) {
1388                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1389                         if (err < 0) {
1390                                 r = EXIT_CGROUP;
1391                                 goto fail_child;
1392                         }
1393
1394
1395                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1396                         if (err < 0) {
1397                                 r = EXIT_CGROUP;
1398                                 goto fail_child;
1399                         }
1400                 }
1401 #endif
1402
1403                 if (apply_permissions) {
1404                         err = enforce_groups(context, username, gid);
1405                         if (err < 0) {
1406                                 r = EXIT_GROUP;
1407                                 goto fail_child;
1408                         }
1409                 }
1410
1411                 umask(context->umask);
1412
1413 #ifdef HAVE_PAM
1414                 if (apply_permissions && context->pam_name && username) {
1415                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1416                         if (err < 0) {
1417                                 r = EXIT_PAM;
1418                                 goto fail_child;
1419                         }
1420                 }
1421 #endif
1422                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1423                         err = setup_netns(runtime->netns_storage_socket);
1424                         if (err < 0) {
1425                                 r = EXIT_NETWORK;
1426                                 goto fail_child;
1427                         }
1428                 }
1429
1430                 if (!strv_isempty(context->read_write_dirs) ||
1431                     !strv_isempty(context->read_only_dirs) ||
1432                     !strv_isempty(context->inaccessible_dirs) ||
1433                     context->mount_flags != 0 ||
1434                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1435                     context->private_devices) {
1436
1437                         char *tmp = NULL, *var = NULL;
1438
1439                         /* The runtime struct only contains the parent
1440                          * of the private /tmp, which is
1441                          * non-accessible to world users. Inside of it
1442                          * there's a /tmp that is sticky, and that's
1443                          * the one we want to use here. */
1444
1445                         if (context->private_tmp && runtime) {
1446                                 if (runtime->tmp_dir)
1447                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1448                                 if (runtime->var_tmp_dir)
1449                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1450                         }
1451
1452                         err = setup_namespace(
1453                                         context->read_write_dirs,
1454                                         context->read_only_dirs,
1455                                         context->inaccessible_dirs,
1456                                         tmp,
1457                                         var,
1458                                         context->private_devices,
1459                                         context->mount_flags);
1460
1461                         if (err < 0) {
1462                                 r = EXIT_NAMESPACE;
1463                                 goto fail_child;
1464                         }
1465                 }
1466
1467                 if (apply_chroot) {
1468                         if (context->root_directory)
1469                                 if (chroot(context->root_directory) < 0) {
1470                                         err = -errno;
1471                                         r = EXIT_CHROOT;
1472                                         goto fail_child;
1473                                 }
1474
1475                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1476                                 err = -errno;
1477                                 r = EXIT_CHDIR;
1478                                 goto fail_child;
1479                         }
1480                 } else {
1481                         _cleanup_free_ char *d = NULL;
1482
1483                         if (asprintf(&d, "%s/%s",
1484                                      context->root_directory ? context->root_directory : "",
1485                                      context->working_directory ? context->working_directory : "") < 0) {
1486                                 err = -ENOMEM;
1487                                 r = EXIT_MEMORY;
1488                                 goto fail_child;
1489                         }
1490
1491                         if (chdir(d) < 0) {
1492                                 err = -errno;
1493                                 r = EXIT_CHDIR;
1494                                 goto fail_child;
1495                         }
1496                 }
1497
1498                 /* We repeat the fd closing here, to make sure that
1499                  * nothing is leaked from the PAM modules */
1500                 err = close_all_fds(fds, n_fds);
1501                 if (err >= 0)
1502                         err = shift_fds(fds, n_fds);
1503                 if (err >= 0)
1504                         err = flags_fds(fds, n_fds, context->non_blocking);
1505                 if (err < 0) {
1506                         r = EXIT_FDS;
1507                         goto fail_child;
1508                 }
1509
1510                 if (apply_permissions) {
1511
1512                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1513                                 if (!context->rlimit[i])
1514                                         continue;
1515
1516                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1517                                         err = -errno;
1518                                         r = EXIT_LIMITS;
1519                                         goto fail_child;
1520                                 }
1521                         }
1522
1523                         if (context->capability_bounding_set_drop) {
1524                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1525                                 if (err < 0) {
1526                                         r = EXIT_CAPABILITIES;
1527                                         goto fail_child;
1528                                 }
1529                         }
1530
1531                         if (context->user) {
1532                                 err = enforce_user(context, uid);
1533                                 if (err < 0) {
1534                                         r = EXIT_USER;
1535                                         goto fail_child;
1536                                 }
1537                         }
1538
1539                         /* PR_GET_SECUREBITS is not privileged, while
1540                          * PR_SET_SECUREBITS is. So to suppress
1541                          * potential EPERMs we'll try not to call
1542                          * PR_SET_SECUREBITS unless necessary. */
1543                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1544                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1545                                         err = -errno;
1546                                         r = EXIT_SECUREBITS;
1547                                         goto fail_child;
1548                                 }
1549
1550                         if (context->capabilities)
1551                                 if (cap_set_proc(context->capabilities) < 0) {
1552                                         err = -errno;
1553                                         r = EXIT_CAPABILITIES;
1554                                         goto fail_child;
1555                                 }
1556
1557                         if (context->no_new_privileges)
1558                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1559                                         err = -errno;
1560                                         r = EXIT_NO_NEW_PRIVILEGES;
1561                                         goto fail_child;
1562                                 }
1563
1564                         if (context->syscall_filter) {
1565                                 err = apply_seccomp(context->syscall_filter);
1566                                 if (err < 0) {
1567                                         r = EXIT_SECCOMP;
1568                                         goto fail_child;
1569                                 }
1570                         }
1571 #ifdef HAVE_SELINUX
1572                         if (context->selinux_context && use_selinux()) {
1573                                 err = security_check_context(context->selinux_context);
1574                                 if (err < 0) {
1575                                         r = EXIT_SELINUX_CONTEXT;
1576                                         goto fail_child;
1577                                 }
1578                                 err = setexeccon(context->selinux_context);
1579                                 if (err < 0) {
1580                                         r = EXIT_SELINUX_CONTEXT;
1581                                         goto fail_child;
1582                                 }
1583                         }
1584 #endif
1585                 }
1586
1587                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1588                 if (r < 0) {
1589                         r = EXIT_MEMORY;
1590                         goto fail_child;
1591                 }
1592
1593                 final_env = strv_env_merge(5,
1594                                            environment,
1595                                            our_env,
1596                                            context->environment,
1597                                            files_env,
1598                                            pam_env,
1599                                            NULL);
1600                 if (!final_env) {
1601                         err = -ENOMEM;
1602                         r = EXIT_MEMORY;
1603                         goto fail_child;
1604                 }
1605
1606                 final_argv = replace_env_argv(argv, final_env);
1607                 if (!final_argv) {
1608                         err = -ENOMEM;
1609                         r = EXIT_MEMORY;
1610                         goto fail_child;
1611                 }
1612
1613                 final_env = strv_env_clean(final_env);
1614
1615                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1616                         line = exec_command_line(final_argv);
1617                         if (line) {
1618                                 log_open();
1619                                 log_struct_unit(LOG_DEBUG,
1620                                                 unit_id,
1621                                                 "EXECUTABLE=%s", command->path,
1622                                                 "MESSAGE=Executing: %s", line,
1623                                                 NULL);
1624                                 log_close();
1625                                 free(line);
1626                                 line = NULL;
1627                         }
1628                 }
1629                 execve(command->path, final_argv, final_env);
1630                 err = -errno;
1631                 r = EXIT_EXEC;
1632
1633         fail_child:
1634                 if (r != 0) {
1635                         log_open();
1636                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1637                                    "EXECUTABLE=%s", command->path,
1638                                    "MESSAGE=Failed at step %s spawning %s: %s",
1639                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1640                                           command->path, strerror(-err),
1641                                    "ERRNO=%d", -err,
1642                                    NULL);
1643                         log_close();
1644                 }
1645
1646                 _exit(r);
1647         }
1648
1649         log_struct_unit(LOG_DEBUG,
1650                         unit_id,
1651                         "MESSAGE=Forked %s as "PID_FMT,
1652                         command->path, pid,
1653                         NULL);
1654
1655         /* We add the new process to the cgroup both in the child (so
1656          * that we can be sure that no user code is ever executed
1657          * outside of the cgroup) and in the parent (so that we can be
1658          * sure that when we kill the cgroup the process will be
1659          * killed too). */
1660         if (cgroup_path)
1661                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1662
1663         exec_status_start(&command->exec_status, pid);
1664
1665         *ret = pid;
1666         return 0;
1667 }
1668
1669 void exec_context_init(ExecContext *c) {
1670         assert(c);
1671
1672         c->umask = 0022;
1673         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1674         c->cpu_sched_policy = SCHED_OTHER;
1675         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1676         c->syslog_level_prefix = true;
1677         c->ignore_sigpipe = true;
1678         c->timer_slack_nsec = (nsec_t) -1;
1679 }
1680
1681 void exec_context_done(ExecContext *c) {
1682         unsigned l;
1683
1684         assert(c);
1685
1686         strv_free(c->environment);
1687         c->environment = NULL;
1688
1689         strv_free(c->environment_files);
1690         c->environment_files = NULL;
1691
1692         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1693                 free(c->rlimit[l]);
1694                 c->rlimit[l] = NULL;
1695         }
1696
1697         free(c->working_directory);
1698         c->working_directory = NULL;
1699         free(c->root_directory);
1700         c->root_directory = NULL;
1701
1702         free(c->tty_path);
1703         c->tty_path = NULL;
1704
1705         free(c->tcpwrap_name);
1706         c->tcpwrap_name = NULL;
1707
1708         free(c->syslog_identifier);
1709         c->syslog_identifier = NULL;
1710
1711         free(c->user);
1712         c->user = NULL;
1713
1714         free(c->group);
1715         c->group = NULL;
1716
1717         strv_free(c->supplementary_groups);
1718         c->supplementary_groups = NULL;
1719
1720         free(c->pam_name);
1721         c->pam_name = NULL;
1722
1723         if (c->capabilities) {
1724                 cap_free(c->capabilities);
1725                 c->capabilities = NULL;
1726         }
1727
1728         strv_free(c->read_only_dirs);
1729         c->read_only_dirs = NULL;
1730
1731         strv_free(c->read_write_dirs);
1732         c->read_write_dirs = NULL;
1733
1734         strv_free(c->inaccessible_dirs);
1735         c->inaccessible_dirs = NULL;
1736
1737         if (c->cpuset)
1738                 CPU_FREE(c->cpuset);
1739
1740         free(c->utmp_id);
1741         c->utmp_id = NULL;
1742
1743         free(c->selinux_context);
1744         c->selinux_context = NULL;
1745
1746         free(c->syscall_filter);
1747         c->syscall_filter = NULL;
1748 }
1749
1750 void exec_command_done(ExecCommand *c) {
1751         assert(c);
1752
1753         free(c->path);
1754         c->path = NULL;
1755
1756         strv_free(c->argv);
1757         c->argv = NULL;
1758 }
1759
1760 void exec_command_done_array(ExecCommand *c, unsigned n) {
1761         unsigned i;
1762
1763         for (i = 0; i < n; i++)
1764                 exec_command_done(c+i);
1765 }
1766
1767 void exec_command_free_list(ExecCommand *c) {
1768         ExecCommand *i;
1769
1770         while ((i = c)) {
1771                 LIST_REMOVE(command, c, i);
1772                 exec_command_done(i);
1773                 free(i);
1774         }
1775 }
1776
1777 void exec_command_free_array(ExecCommand **c, unsigned n) {
1778         unsigned i;
1779
1780         for (i = 0; i < n; i++) {
1781                 exec_command_free_list(c[i]);
1782                 c[i] = NULL;
1783         }
1784 }
1785
1786 int exec_context_load_environment(const ExecContext *c, char ***l) {
1787         char **i, **r = NULL;
1788
1789         assert(c);
1790         assert(l);
1791
1792         STRV_FOREACH(i, c->environment_files) {
1793                 char *fn;
1794                 int k;
1795                 bool ignore = false;
1796                 char **p;
1797                 _cleanup_globfree_ glob_t pglob = {};
1798                 int count, n;
1799
1800                 fn = *i;
1801
1802                 if (fn[0] == '-') {
1803                         ignore = true;
1804                         fn ++;
1805                 }
1806
1807                 if (!path_is_absolute(fn)) {
1808                         if (ignore)
1809                                 continue;
1810
1811                         strv_free(r);
1812                         return -EINVAL;
1813                 }
1814
1815                 /* Filename supports globbing, take all matching files */
1816                 errno = 0;
1817                 if (glob(fn, 0, NULL, &pglob) != 0) {
1818                         if (ignore)
1819                                 continue;
1820
1821                         strv_free(r);
1822                         return errno ? -errno : -EINVAL;
1823                 }
1824                 count = pglob.gl_pathc;
1825                 if (count == 0) {
1826                         if (ignore)
1827                                 continue;
1828
1829                         strv_free(r);
1830                         return -EINVAL;
1831                 }
1832                 for (n = 0; n < count; n++) {
1833                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1834                         if (k < 0) {
1835                                 if (ignore)
1836                                         continue;
1837
1838                                 strv_free(r);
1839                                 return k;
1840                         }
1841                         /* Log invalid environment variables with filename */
1842                         if (p)
1843                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1844
1845                         if (r == NULL)
1846                                 r = p;
1847                         else {
1848                                 char **m;
1849
1850                                 m = strv_env_merge(2, r, p);
1851                                 strv_free(r);
1852                                 strv_free(p);
1853                                 if (!m)
1854                                         return -ENOMEM;
1855
1856                                 r = m;
1857                         }
1858                 }
1859         }
1860
1861         *l = r;
1862
1863         return 0;
1864 }
1865
1866 static bool tty_may_match_dev_console(const char *tty) {
1867         char *active = NULL, *console;
1868         bool b;
1869
1870         if (startswith(tty, "/dev/"))
1871                 tty += 5;
1872
1873         /* trivial identity? */
1874         if (streq(tty, "console"))
1875                 return true;
1876
1877         console = resolve_dev_console(&active);
1878         /* if we could not resolve, assume it may */
1879         if (!console)
1880                 return true;
1881
1882         /* "tty0" means the active VC, so it may be the same sometimes */
1883         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1884         free(active);
1885
1886         return b;
1887 }
1888
1889 bool exec_context_may_touch_console(ExecContext *ec) {
1890         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1891                 is_terminal_input(ec->std_input) ||
1892                 is_terminal_output(ec->std_output) ||
1893                 is_terminal_output(ec->std_error)) &&
1894                tty_may_match_dev_console(tty_path(ec));
1895 }
1896
1897 static void strv_fprintf(FILE *f, char **l) {
1898         char **g;
1899
1900         assert(f);
1901
1902         STRV_FOREACH(g, l)
1903                 fprintf(f, " %s", *g);
1904 }
1905
1906 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1907         char **e;
1908         unsigned i;
1909
1910         assert(c);
1911         assert(f);
1912
1913         prefix = strempty(prefix);
1914
1915         fprintf(f,
1916                 "%sUMask: %04o\n"
1917                 "%sWorkingDirectory: %s\n"
1918                 "%sRootDirectory: %s\n"
1919                 "%sNonBlocking: %s\n"
1920                 "%sPrivateTmp: %s\n"
1921                 "%sPrivateNetwork: %s\n"
1922                 "%sPrivateDevices: %s\n"
1923                 "%sIgnoreSIGPIPE: %s\n",
1924                 prefix, c->umask,
1925                 prefix, c->working_directory ? c->working_directory : "/",
1926                 prefix, c->root_directory ? c->root_directory : "/",
1927                 prefix, yes_no(c->non_blocking),
1928                 prefix, yes_no(c->private_tmp),
1929                 prefix, yes_no(c->private_network),
1930                 prefix, yes_no(c->private_devices),
1931                 prefix, yes_no(c->ignore_sigpipe));
1932
1933         STRV_FOREACH(e, c->environment)
1934                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1935
1936         STRV_FOREACH(e, c->environment_files)
1937                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1938
1939         if (c->tcpwrap_name)
1940                 fprintf(f,
1941                         "%sTCPWrapName: %s\n",
1942                         prefix, c->tcpwrap_name);
1943
1944         if (c->nice_set)
1945                 fprintf(f,
1946                         "%sNice: %i\n",
1947                         prefix, c->nice);
1948
1949         if (c->oom_score_adjust_set)
1950                 fprintf(f,
1951                         "%sOOMScoreAdjust: %i\n",
1952                         prefix, c->oom_score_adjust);
1953
1954         for (i = 0; i < RLIM_NLIMITS; i++)
1955                 if (c->rlimit[i])
1956                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1957
1958         if (c->ioprio_set) {
1959                 char *class_str;
1960                 int r;
1961
1962                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1963                 if (r < 0)
1964                         class_str = NULL;
1965                 fprintf(f,
1966                         "%sIOSchedulingClass: %s\n"
1967                         "%sIOPriority: %i\n",
1968                         prefix, strna(class_str),
1969                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1970                 free(class_str);
1971         }
1972
1973         if (c->cpu_sched_set) {
1974                 char *policy_str;
1975                 int r;
1976
1977                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1978                 if (r < 0)
1979                         policy_str = NULL;
1980                 fprintf(f,
1981                         "%sCPUSchedulingPolicy: %s\n"
1982                         "%sCPUSchedulingPriority: %i\n"
1983                         "%sCPUSchedulingResetOnFork: %s\n",
1984                         prefix, strna(policy_str),
1985                         prefix, c->cpu_sched_priority,
1986                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1987                 free(policy_str);
1988         }
1989
1990         if (c->cpuset) {
1991                 fprintf(f, "%sCPUAffinity:", prefix);
1992                 for (i = 0; i < c->cpuset_ncpus; i++)
1993                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1994                                 fprintf(f, " %u", i);
1995                 fputs("\n", f);
1996         }
1997
1998         if (c->timer_slack_nsec != (nsec_t) -1)
1999                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2000
2001         fprintf(f,
2002                 "%sStandardInput: %s\n"
2003                 "%sStandardOutput: %s\n"
2004                 "%sStandardError: %s\n",
2005                 prefix, exec_input_to_string(c->std_input),
2006                 prefix, exec_output_to_string(c->std_output),
2007                 prefix, exec_output_to_string(c->std_error));
2008
2009         if (c->tty_path)
2010                 fprintf(f,
2011                         "%sTTYPath: %s\n"
2012                         "%sTTYReset: %s\n"
2013                         "%sTTYVHangup: %s\n"
2014                         "%sTTYVTDisallocate: %s\n",
2015                         prefix, c->tty_path,
2016                         prefix, yes_no(c->tty_reset),
2017                         prefix, yes_no(c->tty_vhangup),
2018                         prefix, yes_no(c->tty_vt_disallocate));
2019
2020         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2021             c->std_output == EXEC_OUTPUT_KMSG ||
2022             c->std_output == EXEC_OUTPUT_JOURNAL ||
2023             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2024             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2025             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2026             c->std_error == EXEC_OUTPUT_SYSLOG ||
2027             c->std_error == EXEC_OUTPUT_KMSG ||
2028             c->std_error == EXEC_OUTPUT_JOURNAL ||
2029             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2030             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2031             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2032
2033                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2034
2035                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2036                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2037
2038                 fprintf(f,
2039                         "%sSyslogFacility: %s\n"
2040                         "%sSyslogLevel: %s\n",
2041                         prefix, strna(fac_str),
2042                         prefix, strna(lvl_str));
2043         }
2044
2045         if (c->capabilities) {
2046                 _cleanup_cap_free_charp_ char *t;
2047
2048                 t = cap_to_text(c->capabilities, NULL);
2049                 if (t)
2050                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2051         }
2052
2053         if (c->secure_bits)
2054                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2055                         prefix,
2056                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2057                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2058                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2059                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2060                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2061                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2062
2063         if (c->capability_bounding_set_drop) {
2064                 unsigned long l;
2065                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2066
2067                 for (l = 0; l <= cap_last_cap(); l++)
2068                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2069                                 _cleanup_cap_free_charp_ char *t;
2070
2071                                 t = cap_to_name(l);
2072                                 if (t)
2073                                         fprintf(f, " %s", t);
2074                         }
2075
2076                 fputs("\n", f);
2077         }
2078
2079         if (c->user)
2080                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2081         if (c->group)
2082                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2083
2084         if (strv_length(c->supplementary_groups) > 0) {
2085                 fprintf(f, "%sSupplementaryGroups:", prefix);
2086                 strv_fprintf(f, c->supplementary_groups);
2087                 fputs("\n", f);
2088         }
2089
2090         if (c->pam_name)
2091                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2092
2093         if (strv_length(c->read_write_dirs) > 0) {
2094                 fprintf(f, "%sReadWriteDirs:", prefix);
2095                 strv_fprintf(f, c->read_write_dirs);
2096                 fputs("\n", f);
2097         }
2098
2099         if (strv_length(c->read_only_dirs) > 0) {
2100                 fprintf(f, "%sReadOnlyDirs:", prefix);
2101                 strv_fprintf(f, c->read_only_dirs);
2102                 fputs("\n", f);
2103         }
2104
2105         if (strv_length(c->inaccessible_dirs) > 0) {
2106                 fprintf(f, "%sInaccessibleDirs:", prefix);
2107                 strv_fprintf(f, c->inaccessible_dirs);
2108                 fputs("\n", f);
2109         }
2110
2111         if (c->utmp_id)
2112                 fprintf(f,
2113                         "%sUtmpIdentifier: %s\n",
2114                         prefix, c->utmp_id);
2115
2116         if (c->selinux_context)
2117                 fprintf(f,
2118                         "%sSELinuxContext: %s\n",
2119                         prefix, c->selinux_context);
2120
2121 }
2122
2123 void exec_status_start(ExecStatus *s, pid_t pid) {
2124         assert(s);
2125
2126         zero(*s);
2127         s->pid = pid;
2128         dual_timestamp_get(&s->start_timestamp);
2129 }
2130
2131 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2132         assert(s);
2133
2134         if (s->pid && s->pid != pid)
2135                 zero(*s);
2136
2137         s->pid = pid;
2138         dual_timestamp_get(&s->exit_timestamp);
2139
2140         s->code = code;
2141         s->status = status;
2142
2143         if (context) {
2144                 if (context->utmp_id)
2145                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2146
2147                 exec_context_tty_reset(context);
2148         }
2149 }
2150
2151 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2152         char buf[FORMAT_TIMESTAMP_MAX];
2153
2154         assert(s);
2155         assert(f);
2156
2157         if (!prefix)
2158                 prefix = "";
2159
2160         if (s->pid <= 0)
2161                 return;
2162
2163         fprintf(f,
2164                 "%sPID: "PID_FMT"\n",
2165                 prefix, s->pid);
2166
2167         if (s->start_timestamp.realtime > 0)
2168                 fprintf(f,
2169                         "%sStart Timestamp: %s\n",
2170                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2171
2172         if (s->exit_timestamp.realtime > 0)
2173                 fprintf(f,
2174                         "%sExit Timestamp: %s\n"
2175                         "%sExit Code: %s\n"
2176                         "%sExit Status: %i\n",
2177                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2178                         prefix, sigchld_code_to_string(s->code),
2179                         prefix, s->status);
2180 }
2181
2182 char *exec_command_line(char **argv) {
2183         size_t k;
2184         char *n, *p, **a;
2185         bool first = true;
2186
2187         assert(argv);
2188
2189         k = 1;
2190         STRV_FOREACH(a, argv)
2191                 k += strlen(*a)+3;
2192
2193         if (!(n = new(char, k)))
2194                 return NULL;
2195
2196         p = n;
2197         STRV_FOREACH(a, argv) {
2198
2199                 if (!first)
2200                         *(p++) = ' ';
2201                 else
2202                         first = false;
2203
2204                 if (strpbrk(*a, WHITESPACE)) {
2205                         *(p++) = '\'';
2206                         p = stpcpy(p, *a);
2207                         *(p++) = '\'';
2208                 } else
2209                         p = stpcpy(p, *a);
2210
2211         }
2212
2213         *p = 0;
2214
2215         /* FIXME: this doesn't really handle arguments that have
2216          * spaces and ticks in them */
2217
2218         return n;
2219 }
2220
2221 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2222         char *p2;
2223         const char *prefix2;
2224
2225         char *cmd;
2226
2227         assert(c);
2228         assert(f);
2229
2230         if (!prefix)
2231                 prefix = "";
2232         p2 = strappend(prefix, "\t");
2233         prefix2 = p2 ? p2 : prefix;
2234
2235         cmd = exec_command_line(c->argv);
2236
2237         fprintf(f,
2238                 "%sCommand Line: %s\n",
2239                 prefix, cmd ? cmd : strerror(ENOMEM));
2240
2241         free(cmd);
2242
2243         exec_status_dump(&c->exec_status, f, prefix2);
2244
2245         free(p2);
2246 }
2247
2248 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2249         assert(f);
2250
2251         if (!prefix)
2252                 prefix = "";
2253
2254         LIST_FOREACH(command, c, c)
2255                 exec_command_dump(c, f, prefix);
2256 }
2257
2258 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2259         ExecCommand *end;
2260
2261         assert(l);
2262         assert(e);
2263
2264         if (*l) {
2265                 /* It's kind of important, that we keep the order here */
2266                 LIST_FIND_TAIL(command, *l, end);
2267                 LIST_INSERT_AFTER(command, *l, end, e);
2268         } else
2269               *l = e;
2270 }
2271
2272 int exec_command_set(ExecCommand *c, const char *path, ...) {
2273         va_list ap;
2274         char **l, *p;
2275
2276         assert(c);
2277         assert(path);
2278
2279         va_start(ap, path);
2280         l = strv_new_ap(path, ap);
2281         va_end(ap);
2282
2283         if (!l)
2284                 return -ENOMEM;
2285
2286         p = strdup(path);
2287         if (!p) {
2288                 strv_free(l);
2289                 return -ENOMEM;
2290         }
2291
2292         free(c->path);
2293         c->path = p;
2294
2295         strv_free(c->argv);
2296         c->argv = l;
2297
2298         return 0;
2299 }
2300
2301 static int exec_runtime_allocate(ExecRuntime **rt) {
2302
2303         if (*rt)
2304                 return 0;
2305
2306         *rt = new0(ExecRuntime, 1);
2307         if (!*rt)
2308                 return -ENOMEM;
2309
2310         (*rt)->n_ref = 1;
2311         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2312
2313         return 0;
2314 }
2315
2316 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2317         int r;
2318
2319         assert(rt);
2320         assert(c);
2321         assert(id);
2322
2323         if (*rt)
2324                 return 1;
2325
2326         if (!c->private_network && !c->private_tmp)
2327                 return 0;
2328
2329         r = exec_runtime_allocate(rt);
2330         if (r < 0)
2331                 return r;
2332
2333         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2334                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2335                         return -errno;
2336         }
2337
2338         if (c->private_tmp && !(*rt)->tmp_dir) {
2339                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2340                 if (r < 0)
2341                         return r;
2342         }
2343
2344         return 1;
2345 }
2346
2347 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2348         assert(r);
2349         assert(r->n_ref > 0);
2350
2351         r->n_ref++;
2352         return r;
2353 }
2354
2355 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2356
2357         if (!r)
2358                 return NULL;
2359
2360         assert(r->n_ref > 0);
2361
2362         r->n_ref--;
2363         if (r->n_ref <= 0) {
2364                 free(r->tmp_dir);
2365                 free(r->var_tmp_dir);
2366                 close_pipe(r->netns_storage_socket);
2367                 free(r);
2368         }
2369
2370         return NULL;
2371 }
2372
2373 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2374         assert(u);
2375         assert(f);
2376         assert(fds);
2377
2378         if (!rt)
2379                 return 0;
2380
2381         if (rt->tmp_dir)
2382                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2383
2384         if (rt->var_tmp_dir)
2385                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2386
2387         if (rt->netns_storage_socket[0] >= 0) {
2388                 int copy;
2389
2390                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2391                 if (copy < 0)
2392                         return copy;
2393
2394                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2395         }
2396
2397         if (rt->netns_storage_socket[1] >= 0) {
2398                 int copy;
2399
2400                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2401                 if (copy < 0)
2402                         return copy;
2403
2404                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2405         }
2406
2407         return 0;
2408 }
2409
2410 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2411         int r;
2412
2413         assert(rt);
2414         assert(key);
2415         assert(value);
2416
2417         if (streq(key, "tmp-dir")) {
2418                 char *copy;
2419
2420                 r = exec_runtime_allocate(rt);
2421                 if (r < 0)
2422                         return r;
2423
2424                 copy = strdup(value);
2425                 if (!copy)
2426                         return log_oom();
2427
2428                 free((*rt)->tmp_dir);
2429                 (*rt)->tmp_dir = copy;
2430
2431         } else if (streq(key, "var-tmp-dir")) {
2432                 char *copy;
2433
2434                 r = exec_runtime_allocate(rt);
2435                 if (r < 0)
2436                         return r;
2437
2438                 copy = strdup(value);
2439                 if (!copy)
2440                         return log_oom();
2441
2442                 free((*rt)->var_tmp_dir);
2443                 (*rt)->var_tmp_dir = copy;
2444
2445         } else if (streq(key, "netns-socket-0")) {
2446                 int fd;
2447
2448                 r = exec_runtime_allocate(rt);
2449                 if (r < 0)
2450                         return r;
2451
2452                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2453                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2454                 else {
2455                         if ((*rt)->netns_storage_socket[0] >= 0)
2456                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2457
2458                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2459                 }
2460         } else if (streq(key, "netns-socket-1")) {
2461                 int fd;
2462
2463                 r = exec_runtime_allocate(rt);
2464                 if (r < 0)
2465                         return r;
2466
2467                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2468                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2469                 else {
2470                         if ((*rt)->netns_storage_socket[1] >= 0)
2471                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2472
2473                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2474                 }
2475         } else
2476                 return 0;
2477
2478         return 1;
2479 }
2480
2481 static void *remove_tmpdir_thread(void *p) {
2482         _cleanup_free_ char *path = p;
2483
2484         rm_rf_dangerous(path, false, true, false);
2485         return NULL;
2486 }
2487
2488 void exec_runtime_destroy(ExecRuntime *rt) {
2489         if (!rt)
2490                 return;
2491
2492         /* If there are multiple users of this, let's leave the stuff around */
2493         if (rt->n_ref > 1)
2494                 return;
2495
2496         if (rt->tmp_dir) {
2497                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2498                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2499                 rt->tmp_dir = NULL;
2500         }
2501
2502         if (rt->var_tmp_dir) {
2503                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2504                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2505                 rt->var_tmp_dir = NULL;
2506         }
2507
2508         close_pipe(rt->netns_storage_socket);
2509 }
2510
2511 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2512         [EXEC_INPUT_NULL] = "null",
2513         [EXEC_INPUT_TTY] = "tty",
2514         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2515         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2516         [EXEC_INPUT_SOCKET] = "socket"
2517 };
2518
2519 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2520
2521 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2522         [EXEC_OUTPUT_INHERIT] = "inherit",
2523         [EXEC_OUTPUT_NULL] = "null",
2524         [EXEC_OUTPUT_TTY] = "tty",
2525         [EXEC_OUTPUT_SYSLOG] = "syslog",
2526         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2527         [EXEC_OUTPUT_KMSG] = "kmsg",
2528         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2529         [EXEC_OUTPUT_JOURNAL] = "journal",
2530         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2531         [EXEC_OUTPUT_SOCKET] = "socket"
2532 };
2533
2534 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);