chiark / gitweb /
3376adc34aaded0d5a63a8b45346e609ae611959
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71
72 /* This assumes there is a 'tty' group */
73 #define TTY_MODE 0620
74
75 static int shift_fds(int fds[], unsigned n_fds) {
76         int start, restart_from;
77
78         if (n_fds <= 0)
79                 return 0;
80
81         /* Modifies the fds array! (sorts it) */
82
83         assert(fds);
84
85         start = 0;
86         for (;;) {
87                 int i;
88
89                 restart_from = -1;
90
91                 for (i = start; i < (int) n_fds; i++) {
92                         int nfd;
93
94                         /* Already at right index? */
95                         if (fds[i] == i+3)
96                                 continue;
97
98                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99                                 return -errno;
100
101                         close_nointr_nofail(fds[i]);
102                         fds[i] = nfd;
103
104                         /* Hmm, the fd we wanted isn't free? Then
105                          * let's remember that and try again from here*/
106                         if (nfd != i+3 && restart_from < 0)
107                                 restart_from = i;
108                 }
109
110                 if (restart_from < 0)
111                         break;
112
113                 start = restart_from;
114         }
115
116         return 0;
117 }
118
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120         unsigned i;
121         int r;
122
123         if (n_fds <= 0)
124                 return 0;
125
126         assert(fds);
127
128         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129
130         for (i = 0; i < n_fds; i++) {
131
132                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133                         return r;
134
135                 /* We unconditionally drop FD_CLOEXEC from the fds,
136                  * since after all we want to pass these fds to our
137                  * children */
138
139                 if ((r = fd_cloexec(fds[i], false)) < 0)
140                         return r;
141         }
142
143         return 0;
144 }
145
146 static const char *tty_path(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_path)
150                 return context->tty_path;
151
152         return "/dev/console";
153 }
154
155 void exec_context_tty_reset(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_vhangup)
159                 terminal_vhangup(tty_path(context));
160
161         if (context->tty_reset)
162                 reset_terminal(tty_path(context));
163
164         if (context->tty_vt_disallocate && context->tty_path)
165                 vt_disallocate(context->tty_path);
166 }
167
168 static int open_null_as(int flags, int nfd) {
169         int fd, r;
170
171         assert(nfd >= 0);
172
173         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
174                 return -errno;
175
176         if (fd != nfd) {
177                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178                 close_nointr_nofail(fd);
179         } else
180                 r = nfd;
181
182         return r;
183 }
184
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
186         int fd, r;
187         union sockaddr_union sa;
188
189         assert(context);
190         assert(output < _EXEC_OUTPUT_MAX);
191         assert(ident);
192         assert(nfd >= 0);
193
194         fd = socket(AF_UNIX, SOCK_STREAM, 0);
195         if (fd < 0)
196                 return -errno;
197
198         zero(sa);
199         sa.un.sun_family = AF_UNIX;
200         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
201
202         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
203         if (r < 0) {
204                 close_nointr_nofail(fd);
205                 return -errno;
206         }
207
208         if (shutdown(fd, SHUT_RD) < 0) {
209                 close_nointr_nofail(fd);
210                 return -errno;
211         }
212
213         dprintf(fd,
214                 "%s\n"
215                 "%s\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n"
219                 "%i\n"
220                 "%i\n",
221                 context->syslog_identifier ? context->syslog_identifier : ident,
222                 unit_id,
223                 context->syslog_priority,
224                 !!context->syslog_level_prefix,
225                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
228
229         if (fd != nfd) {
230                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231                 close_nointr_nofail(fd);
232         } else
233                 r = nfd;
234
235         return r;
236 }
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
238         int fd, r;
239
240         assert(path);
241         assert(nfd >= 0);
242
243         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
244                 return fd;
245
246         if (fd != nfd) {
247                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248                 close_nointr_nofail(fd);
249         } else
250                 r = nfd;
251
252         return r;
253 }
254
255 static bool is_terminal_input(ExecInput i) {
256         return
257                 i == EXEC_INPUT_TTY ||
258                 i == EXEC_INPUT_TTY_FORCE ||
259                 i == EXEC_INPUT_TTY_FAIL;
260 }
261
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
263
264         if (is_terminal_input(std_input) && !apply_tty_stdin)
265                 return EXEC_INPUT_NULL;
266
267         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268                 return EXEC_INPUT_NULL;
269
270         return std_input;
271 }
272
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
274
275         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276                 return EXEC_OUTPUT_INHERIT;
277
278         return std_output;
279 }
280
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
282         ExecInput i;
283
284         assert(context);
285
286         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
287
288         switch (i) {
289
290         case EXEC_INPUT_NULL:
291                 return open_null_as(O_RDONLY, STDIN_FILENO);
292
293         case EXEC_INPUT_TTY:
294         case EXEC_INPUT_TTY_FORCE:
295         case EXEC_INPUT_TTY_FAIL: {
296                 int fd, r;
297
298                 if ((fd = acquire_terminal(
299                                      tty_path(context),
300                                      i == EXEC_INPUT_TTY_FAIL,
301                                      i == EXEC_INPUT_TTY_FORCE,
302                                      false,
303                                      (usec_t) -1)) < 0)
304                         return fd;
305
306                 if (fd != STDIN_FILENO) {
307                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308                         close_nointr_nofail(fd);
309                 } else
310                         r = STDIN_FILENO;
311
312                 return r;
313         }
314
315         case EXEC_INPUT_SOCKET:
316                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
317
318         default:
319                 assert_not_reached("Unknown input type");
320         }
321 }
322
323 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
324         ExecOutput o;
325         ExecInput i;
326
327         assert(context);
328         assert(ident);
329
330         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
331         o = fixup_output(context->std_output, socket_fd);
332
333         /* This expects the input is already set up */
334
335         switch (o) {
336
337         case EXEC_OUTPUT_INHERIT:
338
339                 /* If input got downgraded, inherit the original value */
340                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
341                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
342
343                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
344                 if (i != EXEC_INPUT_NULL)
345                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
346
347                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
348                 if (getppid() != 1)
349                         return STDOUT_FILENO;
350
351                 /* We need to open /dev/null here anew, to get the
352                  * right access mode. So we fall through */
353
354         case EXEC_OUTPUT_NULL:
355                 return open_null_as(O_WRONLY, STDOUT_FILENO);
356
357         case EXEC_OUTPUT_TTY:
358                 if (is_terminal_input(i))
359                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
360
361                 /* We don't reset the terminal if this is just about output */
362                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
363
364         case EXEC_OUTPUT_SYSLOG:
365         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
366         case EXEC_OUTPUT_KMSG:
367         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
368         case EXEC_OUTPUT_JOURNAL:
369         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
370                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
371
372         case EXEC_OUTPUT_SOCKET:
373                 assert(socket_fd >= 0);
374                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
375
376         default:
377                 assert_not_reached("Unknown output type");
378         }
379 }
380
381 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
382         ExecOutput o, e;
383         ExecInput i;
384
385         assert(context);
386         assert(ident);
387
388         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
389         o = fixup_output(context->std_output, socket_fd);
390         e = fixup_output(context->std_error, socket_fd);
391
392         /* This expects the input and output are already set up */
393
394         /* Don't change the stderr file descriptor if we inherit all
395          * the way and are not on a tty */
396         if (e == EXEC_OUTPUT_INHERIT &&
397             o == EXEC_OUTPUT_INHERIT &&
398             i == EXEC_INPUT_NULL &&
399             !is_terminal_input(context->std_input) &&
400             getppid () != 1)
401                 return STDERR_FILENO;
402
403         /* Duplicate from stdout if possible */
404         if (e == o || e == EXEC_OUTPUT_INHERIT)
405                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
406
407         switch (e) {
408
409         case EXEC_OUTPUT_NULL:
410                 return open_null_as(O_WRONLY, STDERR_FILENO);
411
412         case EXEC_OUTPUT_TTY:
413                 if (is_terminal_input(i))
414                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
415
416                 /* We don't reset the terminal if this is just about output */
417                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
418
419         case EXEC_OUTPUT_SYSLOG:
420         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421         case EXEC_OUTPUT_KMSG:
422         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423         case EXEC_OUTPUT_JOURNAL:
424         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
426
427         case EXEC_OUTPUT_SOCKET:
428                 assert(socket_fd >= 0);
429                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
430
431         default:
432                 assert_not_reached("Unknown error type");
433         }
434 }
435
436 static int chown_terminal(int fd, uid_t uid) {
437         struct stat st;
438
439         assert(fd >= 0);
440
441         /* This might fail. What matters are the results. */
442         (void) fchown(fd, uid, -1);
443         (void) fchmod(fd, TTY_MODE);
444
445         if (fstat(fd, &st) < 0)
446                 return -errno;
447
448         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
449                 return -EPERM;
450
451         return 0;
452 }
453
454 static int setup_confirm_stdio(int *_saved_stdin,
455                                int *_saved_stdout) {
456         int fd = -1, saved_stdin, saved_stdout = -1, r;
457
458         assert(_saved_stdin);
459         assert(_saved_stdout);
460
461         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
462         if (saved_stdin < 0)
463                 return -errno;
464
465         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
466         if (saved_stdout < 0) {
467                 r = errno;
468                 goto fail;
469         }
470
471         fd = acquire_terminal(
472                         "/dev/console",
473                         false,
474                         false,
475                         false,
476                         DEFAULT_CONFIRM_USEC);
477         if (fd < 0) {
478                 r = fd;
479                 goto fail;
480         }
481
482         r = chown_terminal(fd, getuid());
483         if (r < 0)
484                 goto fail;
485
486         if (dup2(fd, STDIN_FILENO) < 0) {
487                 r = -errno;
488                 goto fail;
489         }
490
491         if (dup2(fd, STDOUT_FILENO) < 0) {
492                 r = -errno;
493                 goto fail;
494         }
495
496         if (fd >= 2)
497                 close_nointr_nofail(fd);
498
499         *_saved_stdin = saved_stdin;
500         *_saved_stdout = saved_stdout;
501
502         return 0;
503
504 fail:
505         if (saved_stdout >= 0)
506                 close_nointr_nofail(saved_stdout);
507
508         if (saved_stdin >= 0)
509                 close_nointr_nofail(saved_stdin);
510
511         if (fd >= 0)
512                 close_nointr_nofail(fd);
513
514         return r;
515 }
516
517 static int write_confirm_message(const char *format, ...) {
518         int fd;
519         va_list ap;
520
521         assert(format);
522
523         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
524         if (fd < 0)
525                 return fd;
526
527         va_start(ap, format);
528         vdprintf(fd, format, ap);
529         va_end(ap);
530
531         close_nointr_nofail(fd);
532
533         return 0;
534 }
535
536 static int restore_confirm_stdio(int *saved_stdin,
537                                  int *saved_stdout) {
538
539         int r = 0;
540
541         assert(saved_stdin);
542         assert(saved_stdout);
543
544         release_terminal();
545
546         if (*saved_stdin >= 0)
547                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
548                         r = -errno;
549
550         if (*saved_stdout >= 0)
551                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
552                         r = -errno;
553
554         if (*saved_stdin >= 0)
555                 close_nointr_nofail(*saved_stdin);
556
557         if (*saved_stdout >= 0)
558                 close_nointr_nofail(*saved_stdout);
559
560         return r;
561 }
562
563 static int ask_for_confirmation(char *response, char **argv) {
564         int saved_stdout = -1, saved_stdin = -1, r;
565         char *line;
566
567         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
568         if (r < 0)
569                 return r;
570
571         line = exec_command_line(argv);
572         if (!line)
573                 return -ENOMEM;
574
575         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
576         free(line);
577
578         restore_confirm_stdio(&saved_stdin, &saved_stdout);
579
580         return r;
581 }
582
583 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
584         bool keep_groups = false;
585         int r;
586
587         assert(context);
588
589         /* Lookup and set GID and supplementary group list. Here too
590          * we avoid NSS lookups for gid=0. */
591
592         if (context->group || username) {
593
594                 if (context->group) {
595                         const char *g = context->group;
596
597                         if ((r = get_group_creds(&g, &gid)) < 0)
598                                 return r;
599                 }
600
601                 /* First step, initialize groups from /etc/groups */
602                 if (username && gid != 0) {
603                         if (initgroups(username, gid) < 0)
604                                 return -errno;
605
606                         keep_groups = true;
607                 }
608
609                 /* Second step, set our gids */
610                 if (setresgid(gid, gid, gid) < 0)
611                         return -errno;
612         }
613
614         if (context->supplementary_groups) {
615                 int ngroups_max, k;
616                 gid_t *gids;
617                 char **i;
618
619                 /* Final step, initialize any manually set supplementary groups */
620                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
621
622                 if (!(gids = new(gid_t, ngroups_max)))
623                         return -ENOMEM;
624
625                 if (keep_groups) {
626                         if ((k = getgroups(ngroups_max, gids)) < 0) {
627                                 free(gids);
628                                 return -errno;
629                         }
630                 } else
631                         k = 0;
632
633                 STRV_FOREACH(i, context->supplementary_groups) {
634                         const char *g;
635
636                         if (k >= ngroups_max) {
637                                 free(gids);
638                                 return -E2BIG;
639                         }
640
641                         g = *i;
642                         r = get_group_creds(&g, gids+k);
643                         if (r < 0) {
644                                 free(gids);
645                                 return r;
646                         }
647
648                         k++;
649                 }
650
651                 if (setgroups(k, gids) < 0) {
652                         free(gids);
653                         return -errno;
654                 }
655
656                 free(gids);
657         }
658
659         return 0;
660 }
661
662 static int enforce_user(const ExecContext *context, uid_t uid) {
663         int r;
664         assert(context);
665
666         /* Sets (but doesn't lookup) the uid and make sure we keep the
667          * capabilities while doing so. */
668
669         if (context->capabilities) {
670                 cap_t d;
671                 static const cap_value_t bits[] = {
672                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
673                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
674                 };
675
676                 /* First step: If we need to keep capabilities but
677                  * drop privileges we need to make sure we keep our
678                  * caps, whiel we drop privileges. */
679                 if (uid != 0) {
680                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
681
682                         if (prctl(PR_GET_SECUREBITS) != sb)
683                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
684                                         return -errno;
685                 }
686
687                 /* Second step: set the capabilities. This will reduce
688                  * the capabilities to the minimum we need. */
689
690                 if (!(d = cap_dup(context->capabilities)))
691                         return -errno;
692
693                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
694                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
695                         r = -errno;
696                         cap_free(d);
697                         return r;
698                 }
699
700                 if (cap_set_proc(d) < 0) {
701                         r = -errno;
702                         cap_free(d);
703                         return r;
704                 }
705
706                 cap_free(d);
707         }
708
709         /* Third step: actually set the uids */
710         if (setresuid(uid, uid, uid) < 0)
711                 return -errno;
712
713         /* At this point we should have all necessary capabilities but
714            are otherwise a normal user. However, the caps might got
715            corrupted due to the setresuid() so we need clean them up
716            later. This is done outside of this call. */
717
718         return 0;
719 }
720
721 #ifdef HAVE_PAM
722
723 static int null_conv(
724                 int num_msg,
725                 const struct pam_message **msg,
726                 struct pam_response **resp,
727                 void *appdata_ptr) {
728
729         /* We don't support conversations */
730
731         return PAM_CONV_ERR;
732 }
733
734 static int setup_pam(
735                 const char *name,
736                 const char *user,
737                 uid_t uid,
738                 const char *tty,
739                 char ***pam_env,
740                 int fds[], unsigned n_fds) {
741
742         static const struct pam_conv conv = {
743                 .conv = null_conv,
744                 .appdata_ptr = NULL
745         };
746
747         pam_handle_t *handle = NULL;
748         sigset_t ss, old_ss;
749         int pam_code = PAM_SUCCESS;
750         int err;
751         char **e = NULL;
752         bool close_session = false;
753         pid_t pam_pid = 0, parent_pid;
754
755         assert(name);
756         assert(user);
757         assert(pam_env);
758
759         /* We set up PAM in the parent process, then fork. The child
760          * will then stay around until killed via PR_GET_PDEATHSIG or
761          * systemd via the cgroup logic. It will then remove the PAM
762          * session again. The parent process will exec() the actual
763          * daemon. We do things this way to ensure that the main PID
764          * of the daemon is the one we initially fork()ed. */
765
766         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
767                 handle = NULL;
768                 goto fail;
769         }
770
771         if (tty)
772                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
773                         goto fail;
774
775         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
776                 goto fail;
777
778         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
779                 goto fail;
780
781         close_session = true;
782
783         if ((!(e = pam_getenvlist(handle)))) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         if ((pam_pid = fork()) < 0)
798                 goto fail;
799
800         if (pam_pid == 0) {
801                 int sig;
802                 int r = EXIT_PAM;
803
804                 /* The child's job is to reset the PAM session on
805                  * termination */
806
807                 /* This string must fit in 10 chars (i.e. the length
808                  * of "/sbin/init"), to look pretty in /bin/ps */
809                 rename_process("(sd-pam)");
810
811                 /* Make sure we don't keep open the passed fds in this
812                 child. We assume that otherwise only those fds are
813                 open here that have been opened by PAM. */
814                 close_many(fds, n_fds);
815
816                 /* Drop privileges - we don't need any to pam_close_session
817                  * and this will make PR_SET_PDEATHSIG work in most cases.
818                  * If this fails, ignore the error - but expect sd-pam threads
819                  * to fail to exit normally */
820                 if (setresuid(uid, uid, uid) < 0)
821                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
822
823                 /* Wait until our parent died. This will only work if
824                  * the above setresuid() succeeds, otherwise the kernel
825                  * will not allow unprivileged parents kill their privileged
826                  * children this way. We rely on the control groups kill logic
827                  * to do the rest for us. */
828                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
829                         goto child_finish;
830
831                 /* Check if our parent process might already have
832                  * died? */
833                 if (getppid() == parent_pid) {
834                         for (;;) {
835                                 if (sigwait(&ss, &sig) < 0) {
836                                         if (errno == EINTR)
837                                                 continue;
838
839                                         goto child_finish;
840                                 }
841
842                                 assert(sig == SIGTERM);
843                                 break;
844                         }
845                 }
846
847                 /* If our parent died we'll end the session */
848                 if (getppid() != parent_pid)
849                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
850                                 goto child_finish;
851
852                 r = 0;
853
854         child_finish:
855                 pam_end(handle, pam_code | PAM_DATA_SILENT);
856                 _exit(r);
857         }
858
859         /* If the child was forked off successfully it will do all the
860          * cleanups, so forget about the handle here. */
861         handle = NULL;
862
863         /* Unblock SIGTERM again in the parent */
864         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
865                 goto fail;
866
867         /* We close the log explicitly here, since the PAM modules
868          * might have opened it, but we don't want this fd around. */
869         closelog();
870
871         *pam_env = e;
872         e = NULL;
873
874         return 0;
875
876 fail:
877         if (pam_code != PAM_SUCCESS)
878                 err = -EPERM;  /* PAM errors do not map to errno */
879         else
880                 err = -errno;
881
882         if (handle) {
883                 if (close_session)
884                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
885
886                 pam_end(handle, pam_code | PAM_DATA_SILENT);
887         }
888
889         strv_free(e);
890
891         closelog();
892
893         if (pam_pid > 1) {
894                 kill(pam_pid, SIGTERM);
895                 kill(pam_pid, SIGCONT);
896         }
897
898         return err;
899 }
900 #endif
901
902 static void rename_process_from_path(const char *path) {
903         char process_name[11];
904         const char *p;
905         size_t l;
906
907         /* This resulting string must fit in 10 chars (i.e. the length
908          * of "/sbin/init") to look pretty in /bin/ps */
909
910         p = path_get_file_name(path);
911         if (isempty(p)) {
912                 rename_process("(...)");
913                 return;
914         }
915
916         l = strlen(p);
917         if (l > 8) {
918                 /* The end of the process name is usually more
919                  * interesting, since the first bit might just be
920                  * "systemd-" */
921                 p = p + l - 8;
922                 l = 8;
923         }
924
925         process_name[0] = '(';
926         memcpy(process_name+1, p, l);
927         process_name[1+l] = ')';
928         process_name[1+l+1] = 0;
929
930         rename_process(process_name);
931 }
932
933 static int apply_seccomp(uint32_t *syscall_filter) {
934         static const struct sock_filter header[] = {
935                 VALIDATE_ARCHITECTURE,
936                 EXAMINE_SYSCALL
937         };
938         static const struct sock_filter footer[] = {
939                 _KILL_PROCESS
940         };
941
942         int i;
943         unsigned n;
944         struct sock_filter *f;
945         struct sock_fprog prog;
946
947         assert(syscall_filter);
948
949         /* First: count the syscalls to check for */
950         for (i = 0, n = 0; i < syscall_max(); i++)
951                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
952                         n++;
953
954         /* Second: build the filter program from a header the syscall
955          * matches and the footer */
956         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
957         memcpy(f, header, sizeof(header));
958
959         for (i = 0, n = 0; i < syscall_max(); i++)
960                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
961                         struct sock_filter item[] = {
962                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
963                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
964                         };
965
966                         assert_cc(ELEMENTSOF(item) == 2);
967
968                         f[ELEMENTSOF(header) + 2*n]  = item[0];
969                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
970
971                         n++;
972                 }
973
974         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
975
976         /* Third: install the filter */
977         zero(prog);
978         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
979         prog.filter = f;
980         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
981                 return -errno;
982
983         return 0;
984 }
985
986 int exec_spawn(ExecCommand *command,
987                char **argv,
988                const ExecContext *context,
989                int fds[], unsigned n_fds,
990                char **environment,
991                bool apply_permissions,
992                bool apply_chroot,
993                bool apply_tty_stdin,
994                bool confirm_spawn,
995                CGroupBonding *cgroup_bondings,
996                CGroupAttribute *cgroup_attributes,
997                const char *cgroup_suffix,
998                const char *unit_id,
999                int idle_pipe[2],
1000                pid_t *ret) {
1001
1002         pid_t pid;
1003         int r;
1004         char *line;
1005         int socket_fd;
1006         char _cleanup_strv_free_ **files_env = NULL;
1007
1008         assert(command);
1009         assert(context);
1010         assert(ret);
1011         assert(fds || n_fds <= 0);
1012
1013         if (context->std_input == EXEC_INPUT_SOCKET ||
1014             context->std_output == EXEC_OUTPUT_SOCKET ||
1015             context->std_error == EXEC_OUTPUT_SOCKET) {
1016
1017                 if (n_fds != 1)
1018                         return -EINVAL;
1019
1020                 socket_fd = fds[0];
1021
1022                 fds = NULL;
1023                 n_fds = 0;
1024         } else
1025                 socket_fd = -1;
1026
1027         r = exec_context_load_environment(context, &files_env);
1028         if (r < 0) {
1029                 log_struct_unit(LOG_ERR,
1030                            unit_id,
1031                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1032                            "ERRNO=%d", -r,
1033                            NULL);
1034                 return r;
1035         }
1036
1037         if (!argv)
1038                 argv = command->argv;
1039
1040         line = exec_command_line(argv);
1041         if (!line)
1042                 return log_oom();
1043
1044         log_struct_unit(LOG_DEBUG,
1045                    unit_id,
1046                    "MESSAGE=About to execute %s", line,
1047                    NULL);
1048         free(line);
1049
1050         r = cgroup_bonding_realize_list(cgroup_bondings);
1051         if (r < 0)
1052                 return r;
1053
1054         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1055
1056         pid = fork();
1057         if (pid < 0)
1058                 return -errno;
1059
1060         if (pid == 0) {
1061                 int i, err;
1062                 sigset_t ss;
1063                 const char *username = NULL, *home = NULL;
1064                 uid_t uid = (uid_t) -1;
1065                 gid_t gid = (gid_t) -1;
1066                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1067                         **final_env = NULL, **final_argv = NULL;
1068                 unsigned n_env = 0;
1069                 bool set_access = false;
1070
1071                 /* child */
1072
1073                 rename_process_from_path(command->path);
1074
1075                 /* We reset exactly these signals, since they are the
1076                  * only ones we set to SIG_IGN in the main daemon. All
1077                  * others we leave untouched because we set them to
1078                  * SIG_DFL or a valid handler initially, both of which
1079                  * will be demoted to SIG_DFL. */
1080                 default_signals(SIGNALS_CRASH_HANDLER,
1081                                 SIGNALS_IGNORE, -1);
1082
1083                 if (context->ignore_sigpipe)
1084                         ignore_signals(SIGPIPE, -1);
1085
1086                 assert_se(sigemptyset(&ss) == 0);
1087                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1088                         err = -errno;
1089                         r = EXIT_SIGNAL_MASK;
1090                         goto fail_child;
1091                 }
1092
1093                 if (idle_pipe) {
1094                         if (idle_pipe[1] >= 0)
1095                                 close_nointr_nofail(idle_pipe[1]);
1096                         if (idle_pipe[0] >= 0) {
1097                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1098                                 close_nointr_nofail(idle_pipe[0]);
1099                         }
1100                 }
1101
1102                 /* Close sockets very early to make sure we don't
1103                  * block init reexecution because it cannot bind its
1104                  * sockets */
1105                 log_forget_fds();
1106                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1107                                            socket_fd >= 0 ? 1 : n_fds);
1108                 if (err < 0) {
1109                         r = EXIT_FDS;
1110                         goto fail_child;
1111                 }
1112
1113                 if (!context->same_pgrp)
1114                         if (setsid() < 0) {
1115                                 err = -errno;
1116                                 r = EXIT_SETSID;
1117                                 goto fail_child;
1118                         }
1119
1120                 if (context->tcpwrap_name) {
1121                         if (socket_fd >= 0)
1122                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1123                                         err = -EACCES;
1124                                         r = EXIT_TCPWRAP;
1125                                         goto fail_child;
1126                                 }
1127
1128                         for (i = 0; i < (int) n_fds; i++) {
1129                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1130                                         err = -EACCES;
1131                                         r = EXIT_TCPWRAP;
1132                                         goto fail_child;
1133                                 }
1134                         }
1135                 }
1136
1137                 exec_context_tty_reset(context);
1138
1139                 if (confirm_spawn) {
1140                         char response;
1141
1142                         err = ask_for_confirmation(&response, argv);
1143                         if (err == -ETIMEDOUT)
1144                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1145                         else if (err < 0)
1146                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1147                         else if (response == 's') {
1148                                 write_confirm_message("Skipping execution.\n");
1149                                 err = -ECANCELED;
1150                                 r = EXIT_CONFIRM;
1151                                 goto fail_child;
1152                         } else if (response == 'n') {
1153                                 write_confirm_message("Failing execution.\n");
1154                                 err = r = 0;
1155                                 goto fail_child;
1156                         }
1157                 }
1158
1159                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1160                  * must sure to drop O_NONBLOCK */
1161                 if (socket_fd >= 0)
1162                         fd_nonblock(socket_fd, false);
1163
1164                 err = setup_input(context, socket_fd, apply_tty_stdin);
1165                 if (err < 0) {
1166                         r = EXIT_STDIN;
1167                         goto fail_child;
1168                 }
1169
1170                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1171                 if (err < 0) {
1172                         r = EXIT_STDOUT;
1173                         goto fail_child;
1174                 }
1175
1176                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1177                 if (err < 0) {
1178                         r = EXIT_STDERR;
1179                         goto fail_child;
1180                 }
1181
1182                 if (cgroup_bondings) {
1183                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1184                         if (err < 0) {
1185                                 r = EXIT_CGROUP;
1186                                 goto fail_child;
1187                         }
1188                 }
1189
1190                 if (context->oom_score_adjust_set) {
1191                         char t[16];
1192
1193                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1194                         char_array_0(t);
1195
1196                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1197                                 err = -errno;
1198                                 r = EXIT_OOM_ADJUST;
1199                                 goto fail_child;
1200                         }
1201                 }
1202
1203                 if (context->nice_set)
1204                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1205                                 err = -errno;
1206                                 r = EXIT_NICE;
1207                                 goto fail_child;
1208                         }
1209
1210                 if (context->cpu_sched_set) {
1211                         struct sched_param param;
1212
1213                         zero(param);
1214                         param.sched_priority = context->cpu_sched_priority;
1215
1216                         if (sched_setscheduler(0, context->cpu_sched_policy |
1217                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1218                                 err = -errno;
1219                                 r = EXIT_SETSCHEDULER;
1220                                 goto fail_child;
1221                         }
1222                 }
1223
1224                 if (context->cpuset)
1225                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1226                                 err = -errno;
1227                                 r = EXIT_CPUAFFINITY;
1228                                 goto fail_child;
1229                         }
1230
1231                 if (context->ioprio_set)
1232                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1233                                 err = -errno;
1234                                 r = EXIT_IOPRIO;
1235                                 goto fail_child;
1236                         }
1237
1238                 if (context->timer_slack_nsec != (nsec_t) -1)
1239                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1240                                 err = -errno;
1241                                 r = EXIT_TIMERSLACK;
1242                                 goto fail_child;
1243                         }
1244
1245                 if (context->utmp_id)
1246                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1247
1248                 if (context->user) {
1249                         username = context->user;
1250                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1251                         if (err < 0) {
1252                                 r = EXIT_USER;
1253                                 goto fail_child;
1254                         }
1255
1256                         if (is_terminal_input(context->std_input)) {
1257                                 err = chown_terminal(STDIN_FILENO, uid);
1258                                 if (err < 0) {
1259                                         r = EXIT_STDIN;
1260                                         goto fail_child;
1261                                 }
1262                         }
1263
1264                         if (cgroup_bondings && context->control_group_modify) {
1265                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1266                                 if (err >= 0)
1267                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1268                                 if (err < 0) {
1269                                         r = EXIT_CGROUP;
1270                                         goto fail_child;
1271                                 }
1272
1273                                 set_access = true;
1274                         }
1275                 }
1276
1277                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1278                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1279                         if (err < 0) {
1280                                 r = EXIT_CGROUP;
1281                                 goto fail_child;
1282                         }
1283                 }
1284
1285                 if (apply_permissions) {
1286                         err = enforce_groups(context, username, gid);
1287                         if (err < 0) {
1288                                 r = EXIT_GROUP;
1289                                 goto fail_child;
1290                         }
1291                 }
1292
1293                 umask(context->umask);
1294
1295 #ifdef HAVE_PAM
1296                 if (apply_permissions && context->pam_name && username) {
1297                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1298                         if (err < 0) {
1299                                 r = EXIT_PAM;
1300                                 goto fail_child;
1301                         }
1302                 }
1303 #endif
1304                 if (context->private_network) {
1305                         if (unshare(CLONE_NEWNET) < 0) {
1306                                 err = -errno;
1307                                 r = EXIT_NETWORK;
1308                                 goto fail_child;
1309                         }
1310
1311                         loopback_setup();
1312                 }
1313
1314                 if (strv_length(context->read_write_dirs) > 0 ||
1315                     strv_length(context->read_only_dirs) > 0 ||
1316                     strv_length(context->inaccessible_dirs) > 0 ||
1317                     context->mount_flags != 0 ||
1318                     context->private_tmp) {
1319                         err = setup_namespace(context->read_write_dirs,
1320                                               context->read_only_dirs,
1321                                               context->inaccessible_dirs,
1322                                               context->private_tmp,
1323                                               context->mount_flags);
1324                         if (err < 0) {
1325                                 r = EXIT_NAMESPACE;
1326                                 goto fail_child;
1327                         }
1328                 }
1329
1330                 if (apply_chroot) {
1331                         if (context->root_directory)
1332                                 if (chroot(context->root_directory) < 0) {
1333                                         err = -errno;
1334                                         r = EXIT_CHROOT;
1335                                         goto fail_child;
1336                                 }
1337
1338                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1339                                 err = -errno;
1340                                 r = EXIT_CHDIR;
1341                                 goto fail_child;
1342                         }
1343                 } else {
1344                         char _cleanup_free_ *d = NULL;
1345
1346                         if (asprintf(&d, "%s/%s",
1347                                      context->root_directory ? context->root_directory : "",
1348                                      context->working_directory ? context->working_directory : "") < 0) {
1349                                 err = -ENOMEM;
1350                                 r = EXIT_MEMORY;
1351                                 goto fail_child;
1352                         }
1353
1354                         if (chdir(d) < 0) {
1355                                 err = -errno;
1356                                 r = EXIT_CHDIR;
1357                                 goto fail_child;
1358                         }
1359                 }
1360
1361                 /* We repeat the fd closing here, to make sure that
1362                  * nothing is leaked from the PAM modules */
1363                 err = close_all_fds(fds, n_fds);
1364                 if (err >= 0)
1365                         err = shift_fds(fds, n_fds);
1366                 if (err >= 0)
1367                         err = flags_fds(fds, n_fds, context->non_blocking);
1368                 if (err < 0) {
1369                         r = EXIT_FDS;
1370                         goto fail_child;
1371                 }
1372
1373                 if (apply_permissions) {
1374
1375                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1376                                 if (!context->rlimit[i])
1377                                         continue;
1378
1379                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1380                                         err = -errno;
1381                                         r = EXIT_LIMITS;
1382                                         goto fail_child;
1383                                 }
1384                         }
1385
1386                         if (context->capability_bounding_set_drop) {
1387                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1388                                 if (err < 0) {
1389                                         r = EXIT_CAPABILITIES;
1390                                         goto fail_child;
1391                                 }
1392                         }
1393
1394                         if (context->user) {
1395                                 err = enforce_user(context, uid);
1396                                 if (err < 0) {
1397                                         r = EXIT_USER;
1398                                         goto fail_child;
1399                                 }
1400                         }
1401
1402                         /* PR_GET_SECUREBITS is not privileged, while
1403                          * PR_SET_SECUREBITS is. So to suppress
1404                          * potential EPERMs we'll try not to call
1405                          * PR_SET_SECUREBITS unless necessary. */
1406                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1407                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1408                                         err = -errno;
1409                                         r = EXIT_SECUREBITS;
1410                                         goto fail_child;
1411                                 }
1412
1413                         if (context->capabilities)
1414                                 if (cap_set_proc(context->capabilities) < 0) {
1415                                         err = -errno;
1416                                         r = EXIT_CAPABILITIES;
1417                                         goto fail_child;
1418                                 }
1419
1420                         if (context->no_new_privileges)
1421                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1422                                         err = -errno;
1423                                         r = EXIT_NO_NEW_PRIVILEGES;
1424                                         goto fail_child;
1425                                 }
1426
1427                         if (context->syscall_filter) {
1428                                 err = apply_seccomp(context->syscall_filter);
1429                                 if (err < 0) {
1430                                         r = EXIT_SECCOMP;
1431                                         goto fail_child;
1432                                 }
1433                         }
1434                 }
1435
1436                 if (!(our_env = new0(char*, 7))) {
1437                         err = -ENOMEM;
1438                         r = EXIT_MEMORY;
1439                         goto fail_child;
1440                 }
1441
1442                 if (n_fds > 0)
1443                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1444                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1445                                 err = -ENOMEM;
1446                                 r = EXIT_MEMORY;
1447                                 goto fail_child;
1448                         }
1449
1450                 if (home)
1451                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1452                                 err = -ENOMEM;
1453                                 r = EXIT_MEMORY;
1454                                 goto fail_child;
1455                         }
1456
1457                 if (username)
1458                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1459                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1460                                 err = -ENOMEM;
1461                                 r = EXIT_MEMORY;
1462                                 goto fail_child;
1463                         }
1464
1465                 if (is_terminal_input(context->std_input) ||
1466                     context->std_output == EXEC_OUTPUT_TTY ||
1467                     context->std_error == EXEC_OUTPUT_TTY)
1468                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1469                                 err = -ENOMEM;
1470                                 r = EXIT_MEMORY;
1471                                 goto fail_child;
1472                         }
1473
1474                 assert(n_env <= 7);
1475
1476                 if (!(final_env = strv_env_merge(
1477                                       5,
1478                                       environment,
1479                                       our_env,
1480                                       context->environment,
1481                                       files_env,
1482                                       pam_env,
1483                                       NULL))) {
1484                         err = -ENOMEM;
1485                         r = EXIT_MEMORY;
1486                         goto fail_child;
1487                 }
1488
1489                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1490                         err = -ENOMEM;
1491                         r = EXIT_MEMORY;
1492                         goto fail_child;
1493                 }
1494
1495                 final_env = strv_env_clean(final_env);
1496
1497                 execve(command->path, final_argv, final_env);
1498                 err = -errno;
1499                 r = EXIT_EXEC;
1500
1501         fail_child:
1502                 if (r != 0) {
1503                         log_open();
1504                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1505                                    "EXECUTABLE=%s", command->path,
1506                                    "MESSAGE=Failed at step %s spawning %s: %s",
1507                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1508                                           command->path, strerror(-err),
1509                                    "ERRNO=%d", -err,
1510                                    NULL);
1511                         log_close();
1512                 }
1513
1514                 _exit(r);
1515         }
1516
1517         log_struct_unit(LOG_DEBUG,
1518                    unit_id,
1519                    "MESSAGE=Forked %s as %lu",
1520                           command->path, (unsigned long) pid,
1521                    NULL);
1522
1523         /* We add the new process to the cgroup both in the child (so
1524          * that we can be sure that no user code is ever executed
1525          * outside of the cgroup) and in the parent (so that we can be
1526          * sure that when we kill the cgroup the process will be
1527          * killed too). */
1528         if (cgroup_bondings)
1529                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1530
1531         exec_status_start(&command->exec_status, pid);
1532
1533         *ret = pid;
1534         return 0;
1535 }
1536
1537 void exec_context_init(ExecContext *c) {
1538         assert(c);
1539
1540         c->umask = 0022;
1541         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1542         c->cpu_sched_policy = SCHED_OTHER;
1543         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1544         c->syslog_level_prefix = true;
1545         c->control_group_persistent = -1;
1546         c->ignore_sigpipe = true;
1547         c->timer_slack_nsec = (nsec_t) -1;
1548 }
1549
1550 void exec_context_done(ExecContext *c) {
1551         unsigned l;
1552
1553         assert(c);
1554
1555         strv_free(c->environment);
1556         c->environment = NULL;
1557
1558         strv_free(c->environment_files);
1559         c->environment_files = NULL;
1560
1561         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1562                 free(c->rlimit[l]);
1563                 c->rlimit[l] = NULL;
1564         }
1565
1566         free(c->working_directory);
1567         c->working_directory = NULL;
1568         free(c->root_directory);
1569         c->root_directory = NULL;
1570
1571         free(c->tty_path);
1572         c->tty_path = NULL;
1573
1574         free(c->tcpwrap_name);
1575         c->tcpwrap_name = NULL;
1576
1577         free(c->syslog_identifier);
1578         c->syslog_identifier = NULL;
1579
1580         free(c->user);
1581         c->user = NULL;
1582
1583         free(c->group);
1584         c->group = NULL;
1585
1586         strv_free(c->supplementary_groups);
1587         c->supplementary_groups = NULL;
1588
1589         free(c->pam_name);
1590         c->pam_name = NULL;
1591
1592         if (c->capabilities) {
1593                 cap_free(c->capabilities);
1594                 c->capabilities = NULL;
1595         }
1596
1597         strv_free(c->read_only_dirs);
1598         c->read_only_dirs = NULL;
1599
1600         strv_free(c->read_write_dirs);
1601         c->read_write_dirs = NULL;
1602
1603         strv_free(c->inaccessible_dirs);
1604         c->inaccessible_dirs = NULL;
1605
1606         if (c->cpuset)
1607                 CPU_FREE(c->cpuset);
1608
1609         free(c->utmp_id);
1610         c->utmp_id = NULL;
1611
1612         free(c->syscall_filter);
1613         c->syscall_filter = NULL;
1614 }
1615
1616 void exec_command_done(ExecCommand *c) {
1617         assert(c);
1618
1619         free(c->path);
1620         c->path = NULL;
1621
1622         strv_free(c->argv);
1623         c->argv = NULL;
1624 }
1625
1626 void exec_command_done_array(ExecCommand *c, unsigned n) {
1627         unsigned i;
1628
1629         for (i = 0; i < n; i++)
1630                 exec_command_done(c+i);
1631 }
1632
1633 void exec_command_free_list(ExecCommand *c) {
1634         ExecCommand *i;
1635
1636         while ((i = c)) {
1637                 LIST_REMOVE(ExecCommand, command, c, i);
1638                 exec_command_done(i);
1639                 free(i);
1640         }
1641 }
1642
1643 void exec_command_free_array(ExecCommand **c, unsigned n) {
1644         unsigned i;
1645
1646         for (i = 0; i < n; i++) {
1647                 exec_command_free_list(c[i]);
1648                 c[i] = NULL;
1649         }
1650 }
1651
1652 int exec_context_load_environment(const ExecContext *c, char ***l) {
1653         char **i, **r = NULL;
1654
1655         assert(c);
1656         assert(l);
1657
1658         STRV_FOREACH(i, c->environment_files) {
1659                 char *fn;
1660                 int k;
1661                 bool ignore = false;
1662                 char **p;
1663                 glob_t pglob;
1664                 int count, n;
1665
1666                 fn = *i;
1667
1668                 if (fn[0] == '-') {
1669                         ignore = true;
1670                         fn ++;
1671                 }
1672
1673                 if (!path_is_absolute(fn)) {
1674
1675                         if (ignore)
1676                                 continue;
1677
1678                         strv_free(r);
1679                         return -EINVAL;
1680                 }
1681
1682                 /* Filename supports globbing, take all matching files */
1683                 zero(pglob);
1684                 errno = 0;
1685                 if (glob(fn, 0, NULL, &pglob) != 0) {
1686                         globfree(&pglob);
1687                         if (ignore)
1688                                 continue;
1689
1690                         strv_free(r);
1691                         return errno ? -errno : -EINVAL;
1692                 }
1693                 count = pglob.gl_pathc;
1694                 if (count == 0) {
1695                         globfree(&pglob);
1696                         if (ignore)
1697                                 continue;
1698
1699                         strv_free(r);
1700                         return -EINVAL;
1701                 }
1702                 for (n = 0; n < count; n++) {
1703                         k = load_env_file(pglob.gl_pathv[n], &p);
1704                         if (k < 0) {
1705                                 if (ignore)
1706                                         continue;
1707
1708                                 strv_free(r);
1709                                 globfree(&pglob);
1710                                 return k;
1711                          }
1712
1713                         if (r == NULL)
1714                                 r = p;
1715                         else {
1716                                 char **m;
1717
1718                                 m = strv_env_merge(2, r, p);
1719                                 strv_free(r);
1720                                 strv_free(p);
1721
1722                                 if (!m) {
1723                                         globfree(&pglob);
1724                                         return -ENOMEM;
1725                                 }
1726
1727                                 r = m;
1728                         }
1729                 }
1730                 globfree(&pglob);
1731         }
1732
1733         *l = r;
1734
1735         return 0;
1736 }
1737
1738 static void strv_fprintf(FILE *f, char **l) {
1739         char **g;
1740
1741         assert(f);
1742
1743         STRV_FOREACH(g, l)
1744                 fprintf(f, " %s", *g);
1745 }
1746
1747 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1748         char ** e;
1749         unsigned i;
1750
1751         assert(c);
1752         assert(f);
1753
1754         if (!prefix)
1755                 prefix = "";
1756
1757         fprintf(f,
1758                 "%sUMask: %04o\n"
1759                 "%sWorkingDirectory: %s\n"
1760                 "%sRootDirectory: %s\n"
1761                 "%sNonBlocking: %s\n"
1762                 "%sPrivateTmp: %s\n"
1763                 "%sControlGroupModify: %s\n"
1764                 "%sControlGroupPersistent: %s\n"
1765                 "%sPrivateNetwork: %s\n"
1766                 "%sIgnoreSIGPIPE: %s\n",
1767                 prefix, c->umask,
1768                 prefix, c->working_directory ? c->working_directory : "/",
1769                 prefix, c->root_directory ? c->root_directory : "/",
1770                 prefix, yes_no(c->non_blocking),
1771                 prefix, yes_no(c->private_tmp),
1772                 prefix, yes_no(c->control_group_modify),
1773                 prefix, yes_no(c->control_group_persistent),
1774                 prefix, yes_no(c->private_network),
1775                 prefix, yes_no(c->ignore_sigpipe));
1776
1777         STRV_FOREACH(e, c->environment)
1778                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1779
1780         STRV_FOREACH(e, c->environment_files)
1781                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1782
1783         if (c->tcpwrap_name)
1784                 fprintf(f,
1785                         "%sTCPWrapName: %s\n",
1786                         prefix, c->tcpwrap_name);
1787
1788         if (c->nice_set)
1789                 fprintf(f,
1790                         "%sNice: %i\n",
1791                         prefix, c->nice);
1792
1793         if (c->oom_score_adjust_set)
1794                 fprintf(f,
1795                         "%sOOMScoreAdjust: %i\n",
1796                         prefix, c->oom_score_adjust);
1797
1798         for (i = 0; i < RLIM_NLIMITS; i++)
1799                 if (c->rlimit[i])
1800                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1801
1802         if (c->ioprio_set) {
1803                 char *class_str;
1804                 int r;
1805
1806                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1807                 if (r < 0)
1808                         class_str = NULL;
1809                 fprintf(f,
1810                         "%sIOSchedulingClass: %s\n"
1811                         "%sIOPriority: %i\n",
1812                         prefix, strna(class_str),
1813                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1814                 free(class_str);
1815         }
1816
1817         if (c->cpu_sched_set) {
1818                 char *policy_str;
1819                 int r;
1820
1821                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1822                 if (r < 0)
1823                         policy_str = NULL;
1824                 fprintf(f,
1825                         "%sCPUSchedulingPolicy: %s\n"
1826                         "%sCPUSchedulingPriority: %i\n"
1827                         "%sCPUSchedulingResetOnFork: %s\n",
1828                         prefix, strna(policy_str),
1829                         prefix, c->cpu_sched_priority,
1830                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1831                 free(policy_str);
1832         }
1833
1834         if (c->cpuset) {
1835                 fprintf(f, "%sCPUAffinity:", prefix);
1836                 for (i = 0; i < c->cpuset_ncpus; i++)
1837                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1838                                 fprintf(f, " %i", i);
1839                 fputs("\n", f);
1840         }
1841
1842         if (c->timer_slack_nsec != (nsec_t) -1)
1843                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1844
1845         fprintf(f,
1846                 "%sStandardInput: %s\n"
1847                 "%sStandardOutput: %s\n"
1848                 "%sStandardError: %s\n",
1849                 prefix, exec_input_to_string(c->std_input),
1850                 prefix, exec_output_to_string(c->std_output),
1851                 prefix, exec_output_to_string(c->std_error));
1852
1853         if (c->tty_path)
1854                 fprintf(f,
1855                         "%sTTYPath: %s\n"
1856                         "%sTTYReset: %s\n"
1857                         "%sTTYVHangup: %s\n"
1858                         "%sTTYVTDisallocate: %s\n",
1859                         prefix, c->tty_path,
1860                         prefix, yes_no(c->tty_reset),
1861                         prefix, yes_no(c->tty_vhangup),
1862                         prefix, yes_no(c->tty_vt_disallocate));
1863
1864         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1865             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1866             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1867             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1868                 char *fac_str, *lvl_str;
1869                 int r;
1870
1871                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1872                 if (r < 0)
1873                         fac_str = NULL;
1874
1875                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1876                 if (r < 0)
1877                         lvl_str = NULL;
1878
1879                 fprintf(f,
1880                         "%sSyslogFacility: %s\n"
1881                         "%sSyslogLevel: %s\n",
1882                         prefix, strna(fac_str),
1883                         prefix, strna(lvl_str));
1884                 free(lvl_str);
1885                 free(fac_str);
1886         }
1887
1888         if (c->capabilities) {
1889                 char *t;
1890                 if ((t = cap_to_text(c->capabilities, NULL))) {
1891                         fprintf(f, "%sCapabilities: %s\n",
1892                                 prefix, t);
1893                         cap_free(t);
1894                 }
1895         }
1896
1897         if (c->secure_bits)
1898                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1899                         prefix,
1900                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1901                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1902                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1903                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1904                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1905                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1906
1907         if (c->capability_bounding_set_drop) {
1908                 unsigned long l;
1909                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1910
1911                 for (l = 0; l <= cap_last_cap(); l++)
1912                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1913                                 char *t;
1914
1915                                 if ((t = cap_to_name(l))) {
1916                                         fprintf(f, " %s", t);
1917                                         cap_free(t);
1918                                 }
1919                         }
1920
1921                 fputs("\n", f);
1922         }
1923
1924         if (c->user)
1925                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1926         if (c->group)
1927                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1928
1929         if (strv_length(c->supplementary_groups) > 0) {
1930                 fprintf(f, "%sSupplementaryGroups:", prefix);
1931                 strv_fprintf(f, c->supplementary_groups);
1932                 fputs("\n", f);
1933         }
1934
1935         if (c->pam_name)
1936                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1937
1938         if (strv_length(c->read_write_dirs) > 0) {
1939                 fprintf(f, "%sReadWriteDirs:", prefix);
1940                 strv_fprintf(f, c->read_write_dirs);
1941                 fputs("\n", f);
1942         }
1943
1944         if (strv_length(c->read_only_dirs) > 0) {
1945                 fprintf(f, "%sReadOnlyDirs:", prefix);
1946                 strv_fprintf(f, c->read_only_dirs);
1947                 fputs("\n", f);
1948         }
1949
1950         if (strv_length(c->inaccessible_dirs) > 0) {
1951                 fprintf(f, "%sInaccessibleDirs:", prefix);
1952                 strv_fprintf(f, c->inaccessible_dirs);
1953                 fputs("\n", f);
1954         }
1955
1956         if (c->utmp_id)
1957                 fprintf(f,
1958                         "%sUtmpIdentifier: %s\n",
1959                         prefix, c->utmp_id);
1960 }
1961
1962 void exec_status_start(ExecStatus *s, pid_t pid) {
1963         assert(s);
1964
1965         zero(*s);
1966         s->pid = pid;
1967         dual_timestamp_get(&s->start_timestamp);
1968 }
1969
1970 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1971         assert(s);
1972
1973         if (s->pid && s->pid != pid)
1974                 zero(*s);
1975
1976         s->pid = pid;
1977         dual_timestamp_get(&s->exit_timestamp);
1978
1979         s->code = code;
1980         s->status = status;
1981
1982         if (context) {
1983                 if (context->utmp_id)
1984                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1985
1986                 exec_context_tty_reset(context);
1987         }
1988 }
1989
1990 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1991         char buf[FORMAT_TIMESTAMP_MAX];
1992
1993         assert(s);
1994         assert(f);
1995
1996         if (!prefix)
1997                 prefix = "";
1998
1999         if (s->pid <= 0)
2000                 return;
2001
2002         fprintf(f,
2003                 "%sPID: %lu\n",
2004                 prefix, (unsigned long) s->pid);
2005
2006         if (s->start_timestamp.realtime > 0)
2007                 fprintf(f,
2008                         "%sStart Timestamp: %s\n",
2009                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2010
2011         if (s->exit_timestamp.realtime > 0)
2012                 fprintf(f,
2013                         "%sExit Timestamp: %s\n"
2014                         "%sExit Code: %s\n"
2015                         "%sExit Status: %i\n",
2016                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2017                         prefix, sigchld_code_to_string(s->code),
2018                         prefix, s->status);
2019 }
2020
2021 char *exec_command_line(char **argv) {
2022         size_t k;
2023         char *n, *p, **a;
2024         bool first = true;
2025
2026         assert(argv);
2027
2028         k = 1;
2029         STRV_FOREACH(a, argv)
2030                 k += strlen(*a)+3;
2031
2032         if (!(n = new(char, k)))
2033                 return NULL;
2034
2035         p = n;
2036         STRV_FOREACH(a, argv) {
2037
2038                 if (!first)
2039                         *(p++) = ' ';
2040                 else
2041                         first = false;
2042
2043                 if (strpbrk(*a, WHITESPACE)) {
2044                         *(p++) = '\'';
2045                         p = stpcpy(p, *a);
2046                         *(p++) = '\'';
2047                 } else
2048                         p = stpcpy(p, *a);
2049
2050         }
2051
2052         *p = 0;
2053
2054         /* FIXME: this doesn't really handle arguments that have
2055          * spaces and ticks in them */
2056
2057         return n;
2058 }
2059
2060 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2061         char *p2;
2062         const char *prefix2;
2063
2064         char *cmd;
2065
2066         assert(c);
2067         assert(f);
2068
2069         if (!prefix)
2070                 prefix = "";
2071         p2 = strappend(prefix, "\t");
2072         prefix2 = p2 ? p2 : prefix;
2073
2074         cmd = exec_command_line(c->argv);
2075
2076         fprintf(f,
2077                 "%sCommand Line: %s\n",
2078                 prefix, cmd ? cmd : strerror(ENOMEM));
2079
2080         free(cmd);
2081
2082         exec_status_dump(&c->exec_status, f, prefix2);
2083
2084         free(p2);
2085 }
2086
2087 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2088         assert(f);
2089
2090         if (!prefix)
2091                 prefix = "";
2092
2093         LIST_FOREACH(command, c, c)
2094                 exec_command_dump(c, f, prefix);
2095 }
2096
2097 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2098         ExecCommand *end;
2099
2100         assert(l);
2101         assert(e);
2102
2103         if (*l) {
2104                 /* It's kind of important, that we keep the order here */
2105                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2106                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2107         } else
2108               *l = e;
2109 }
2110
2111 int exec_command_set(ExecCommand *c, const char *path, ...) {
2112         va_list ap;
2113         char **l, *p;
2114
2115         assert(c);
2116         assert(path);
2117
2118         va_start(ap, path);
2119         l = strv_new_ap(path, ap);
2120         va_end(ap);
2121
2122         if (!l)
2123                 return -ENOMEM;
2124
2125         if (!(p = strdup(path))) {
2126                 strv_free(l);
2127                 return -ENOMEM;
2128         }
2129
2130         free(c->path);
2131         c->path = p;
2132
2133         strv_free(c->argv);
2134         c->argv = l;
2135
2136         return 0;
2137 }
2138
2139 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2140         [EXEC_INPUT_NULL] = "null",
2141         [EXEC_INPUT_TTY] = "tty",
2142         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2143         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2144         [EXEC_INPUT_SOCKET] = "socket"
2145 };
2146
2147 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2148
2149 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2150         [EXEC_OUTPUT_INHERIT] = "inherit",
2151         [EXEC_OUTPUT_NULL] = "null",
2152         [EXEC_OUTPUT_TTY] = "tty",
2153         [EXEC_OUTPUT_SYSLOG] = "syslog",
2154         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2155         [EXEC_OUTPUT_KMSG] = "kmsg",
2156         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2157         [EXEC_OUTPUT_JOURNAL] = "journal",
2158         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2159         [EXEC_OUTPUT_SOCKET] = "socket"
2160 };
2161
2162 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);