chiark / gitweb /
Added globbing support to EnvironmentFile
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67
68 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
69
70 /* This assumes there is a 'tty' group */
71 #define TTY_MODE 0620
72
73 static int shift_fds(int fds[], unsigned n_fds) {
74         int start, restart_from;
75
76         if (n_fds <= 0)
77                 return 0;
78
79         /* Modifies the fds array! (sorts it) */
80
81         assert(fds);
82
83         start = 0;
84         for (;;) {
85                 int i;
86
87                 restart_from = -1;
88
89                 for (i = start; i < (int) n_fds; i++) {
90                         int nfd;
91
92                         /* Already at right index? */
93                         if (fds[i] == i+3)
94                                 continue;
95
96                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
97                                 return -errno;
98
99                         close_nointr_nofail(fds[i]);
100                         fds[i] = nfd;
101
102                         /* Hmm, the fd we wanted isn't free? Then
103                          * let's remember that and try again from here*/
104                         if (nfd != i+3 && restart_from < 0)
105                                 restart_from = i;
106                 }
107
108                 if (restart_from < 0)
109                         break;
110
111                 start = restart_from;
112         }
113
114         return 0;
115 }
116
117 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
118         unsigned i;
119         int r;
120
121         if (n_fds <= 0)
122                 return 0;
123
124         assert(fds);
125
126         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
127
128         for (i = 0; i < n_fds; i++) {
129
130                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
131                         return r;
132
133                 /* We unconditionally drop FD_CLOEXEC from the fds,
134                  * since after all we want to pass these fds to our
135                  * children */
136
137                 if ((r = fd_cloexec(fds[i], false)) < 0)
138                         return r;
139         }
140
141         return 0;
142 }
143
144 static const char *tty_path(const ExecContext *context) {
145         assert(context);
146
147         if (context->tty_path)
148                 return context->tty_path;
149
150         return "/dev/console";
151 }
152
153 void exec_context_tty_reset(const ExecContext *context) {
154         assert(context);
155
156         if (context->tty_vhangup)
157                 terminal_vhangup(tty_path(context));
158
159         if (context->tty_reset)
160                 reset_terminal(tty_path(context));
161
162         if (context->tty_vt_disallocate && context->tty_path)
163                 vt_disallocate(context->tty_path);
164 }
165
166 static int open_null_as(int flags, int nfd) {
167         int fd, r;
168
169         assert(nfd >= 0);
170
171         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
172                 return -errno;
173
174         if (fd != nfd) {
175                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
176                 close_nointr_nofail(fd);
177         } else
178                 r = nfd;
179
180         return r;
181 }
182
183 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
184         int fd, r;
185         union sockaddr_union sa;
186
187         assert(context);
188         assert(output < _EXEC_OUTPUT_MAX);
189         assert(ident);
190         assert(nfd >= 0);
191
192         fd = socket(AF_UNIX, SOCK_STREAM, 0);
193         if (fd < 0)
194                 return -errno;
195
196         zero(sa);
197         sa.un.sun_family = AF_UNIX;
198         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
199
200         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
201         if (r < 0) {
202                 close_nointr_nofail(fd);
203                 return -errno;
204         }
205
206         if (shutdown(fd, SHUT_RD) < 0) {
207                 close_nointr_nofail(fd);
208                 return -errno;
209         }
210
211         dprintf(fd,
212                 "%s\n"
213                 "%s\n"
214                 "%i\n"
215                 "%i\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n",
219                 context->syslog_identifier ? context->syslog_identifier : ident,
220                 unit_id,
221                 context->syslog_priority,
222                 !!context->syslog_level_prefix,
223                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
224                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
225                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
226
227         if (fd != nfd) {
228                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
229                 close_nointr_nofail(fd);
230         } else
231                 r = nfd;
232
233         return r;
234 }
235 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
236         int fd, r;
237
238         assert(path);
239         assert(nfd >= 0);
240
241         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
242                 return fd;
243
244         if (fd != nfd) {
245                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
246                 close_nointr_nofail(fd);
247         } else
248                 r = nfd;
249
250         return r;
251 }
252
253 static bool is_terminal_input(ExecInput i) {
254         return
255                 i == EXEC_INPUT_TTY ||
256                 i == EXEC_INPUT_TTY_FORCE ||
257                 i == EXEC_INPUT_TTY_FAIL;
258 }
259
260 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
261
262         if (is_terminal_input(std_input) && !apply_tty_stdin)
263                 return EXEC_INPUT_NULL;
264
265         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
266                 return EXEC_INPUT_NULL;
267
268         return std_input;
269 }
270
271 static int fixup_output(ExecOutput std_output, int socket_fd) {
272
273         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
274                 return EXEC_OUTPUT_INHERIT;
275
276         return std_output;
277 }
278
279 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
280         ExecInput i;
281
282         assert(context);
283
284         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
285
286         switch (i) {
287
288         case EXEC_INPUT_NULL:
289                 return open_null_as(O_RDONLY, STDIN_FILENO);
290
291         case EXEC_INPUT_TTY:
292         case EXEC_INPUT_TTY_FORCE:
293         case EXEC_INPUT_TTY_FAIL: {
294                 int fd, r;
295
296                 if ((fd = acquire_terminal(
297                                      tty_path(context),
298                                      i == EXEC_INPUT_TTY_FAIL,
299                                      i == EXEC_INPUT_TTY_FORCE,
300                                      false,
301                                      (usec_t) -1)) < 0)
302                         return fd;
303
304                 if (fd != STDIN_FILENO) {
305                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306                         close_nointr_nofail(fd);
307                 } else
308                         r = STDIN_FILENO;
309
310                 return r;
311         }
312
313         case EXEC_INPUT_SOCKET:
314                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
315
316         default:
317                 assert_not_reached("Unknown input type");
318         }
319 }
320
321 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
322         ExecOutput o;
323         ExecInput i;
324
325         assert(context);
326         assert(ident);
327
328         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
329         o = fixup_output(context->std_output, socket_fd);
330
331         /* This expects the input is already set up */
332
333         switch (o) {
334
335         case EXEC_OUTPUT_INHERIT:
336
337                 /* If input got downgraded, inherit the original value */
338                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
339                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
340
341                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
342                 if (i != EXEC_INPUT_NULL)
343                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
344
345                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
346                 if (getppid() != 1)
347                         return STDOUT_FILENO;
348
349                 /* We need to open /dev/null here anew, to get the
350                  * right access mode. So we fall through */
351
352         case EXEC_OUTPUT_NULL:
353                 return open_null_as(O_WRONLY, STDOUT_FILENO);
354
355         case EXEC_OUTPUT_TTY:
356                 if (is_terminal_input(i))
357                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
358
359                 /* We don't reset the terminal if this is just about output */
360                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
361
362         case EXEC_OUTPUT_SYSLOG:
363         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
364         case EXEC_OUTPUT_KMSG:
365         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
366         case EXEC_OUTPUT_JOURNAL:
367         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
368                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
369
370         case EXEC_OUTPUT_SOCKET:
371                 assert(socket_fd >= 0);
372                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
373
374         default:
375                 assert_not_reached("Unknown output type");
376         }
377 }
378
379 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
380         ExecOutput o, e;
381         ExecInput i;
382
383         assert(context);
384         assert(ident);
385
386         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
387         o = fixup_output(context->std_output, socket_fd);
388         e = fixup_output(context->std_error, socket_fd);
389
390         /* This expects the input and output are already set up */
391
392         /* Don't change the stderr file descriptor if we inherit all
393          * the way and are not on a tty */
394         if (e == EXEC_OUTPUT_INHERIT &&
395             o == EXEC_OUTPUT_INHERIT &&
396             i == EXEC_INPUT_NULL &&
397             !is_terminal_input(context->std_input) &&
398             getppid () != 1)
399                 return STDERR_FILENO;
400
401         /* Duplicate from stdout if possible */
402         if (e == o || e == EXEC_OUTPUT_INHERIT)
403                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
404
405         switch (e) {
406
407         case EXEC_OUTPUT_NULL:
408                 return open_null_as(O_WRONLY, STDERR_FILENO);
409
410         case EXEC_OUTPUT_TTY:
411                 if (is_terminal_input(i))
412                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
413
414                 /* We don't reset the terminal if this is just about output */
415                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
416
417         case EXEC_OUTPUT_SYSLOG:
418         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
419         case EXEC_OUTPUT_KMSG:
420         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
421         case EXEC_OUTPUT_JOURNAL:
422         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
423                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
424
425         case EXEC_OUTPUT_SOCKET:
426                 assert(socket_fd >= 0);
427                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
428
429         default:
430                 assert_not_reached("Unknown error type");
431         }
432 }
433
434 static int chown_terminal(int fd, uid_t uid) {
435         struct stat st;
436
437         assert(fd >= 0);
438
439         /* This might fail. What matters are the results. */
440         (void) fchown(fd, uid, -1);
441         (void) fchmod(fd, TTY_MODE);
442
443         if (fstat(fd, &st) < 0)
444                 return -errno;
445
446         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
447                 return -EPERM;
448
449         return 0;
450 }
451
452 static int setup_confirm_stdio(int *_saved_stdin,
453                                int *_saved_stdout) {
454         int fd = -1, saved_stdin, saved_stdout = -1, r;
455
456         assert(_saved_stdin);
457         assert(_saved_stdout);
458
459         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
460         if (saved_stdin < 0)
461                 return -errno;
462
463         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
464         if (saved_stdout < 0) {
465                 r = errno;
466                 goto fail;
467         }
468
469         fd = acquire_terminal(
470                         "/dev/console",
471                         false,
472                         false,
473                         false,
474                         DEFAULT_CONFIRM_USEC);
475         if (fd < 0) {
476                 r = fd;
477                 goto fail;
478         }
479
480         r = chown_terminal(fd, getuid());
481         if (r < 0)
482                 goto fail;
483
484         if (dup2(fd, STDIN_FILENO) < 0) {
485                 r = -errno;
486                 goto fail;
487         }
488
489         if (dup2(fd, STDOUT_FILENO) < 0) {
490                 r = -errno;
491                 goto fail;
492         }
493
494         if (fd >= 2)
495                 close_nointr_nofail(fd);
496
497         *_saved_stdin = saved_stdin;
498         *_saved_stdout = saved_stdout;
499
500         return 0;
501
502 fail:
503         if (saved_stdout >= 0)
504                 close_nointr_nofail(saved_stdout);
505
506         if (saved_stdin >= 0)
507                 close_nointr_nofail(saved_stdin);
508
509         if (fd >= 0)
510                 close_nointr_nofail(fd);
511
512         return r;
513 }
514
515 static int write_confirm_message(const char *format, ...) {
516         int fd;
517         va_list ap;
518
519         assert(format);
520
521         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
522         if (fd < 0)
523                 return fd;
524
525         va_start(ap, format);
526         vdprintf(fd, format, ap);
527         va_end(ap);
528
529         close_nointr_nofail(fd);
530
531         return 0;
532 }
533
534 static int restore_confirm_stdio(int *saved_stdin,
535                                  int *saved_stdout) {
536
537         int r = 0;
538
539         assert(saved_stdin);
540         assert(saved_stdout);
541
542         release_terminal();
543
544         if (*saved_stdin >= 0)
545                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
546                         r = -errno;
547
548         if (*saved_stdout >= 0)
549                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdin >= 0)
553                 close_nointr_nofail(*saved_stdin);
554
555         if (*saved_stdout >= 0)
556                 close_nointr_nofail(*saved_stdout);
557
558         return r;
559 }
560
561 static int ask_for_confirmation(char *response, char **argv) {
562         int saved_stdout = -1, saved_stdin = -1, r;
563         char *line;
564
565         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
566         if (r < 0)
567                 return r;
568
569         line = exec_command_line(argv);
570         if (!line)
571                 return -ENOMEM;
572
573         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
574         free(line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         int r;
662         assert(context);
663
664         /* Sets (but doesn't lookup) the uid and make sure we keep the
665          * capabilities while doing so. */
666
667         if (context->capabilities) {
668                 cap_t d;
669                 static const cap_value_t bits[] = {
670                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
671                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
672                 };
673
674                 /* First step: If we need to keep capabilities but
675                  * drop privileges we need to make sure we keep our
676                  * caps, whiel we drop privileges. */
677                 if (uid != 0) {
678                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
679
680                         if (prctl(PR_GET_SECUREBITS) != sb)
681                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682                                         return -errno;
683                 }
684
685                 /* Second step: set the capabilities. This will reduce
686                  * the capabilities to the minimum we need. */
687
688                 if (!(d = cap_dup(context->capabilities)))
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
693                         r = -errno;
694                         cap_free(d);
695                         return r;
696                 }
697
698                 if (cap_set_proc(d) < 0) {
699                         r = -errno;
700                         cap_free(d);
701                         return r;
702                 }
703
704                 cap_free(d);
705         }
706
707         /* Third step: actually set the uids */
708         if (setresuid(uid, uid, uid) < 0)
709                 return -errno;
710
711         /* At this point we should have all necessary capabilities but
712            are otherwise a normal user. However, the caps might got
713            corrupted due to the setresuid() so we need clean them up
714            later. This is done outside of this call. */
715
716         return 0;
717 }
718
719 #ifdef HAVE_PAM
720
721 static int null_conv(
722                 int num_msg,
723                 const struct pam_message **msg,
724                 struct pam_response **resp,
725                 void *appdata_ptr) {
726
727         /* We don't support conversations */
728
729         return PAM_CONV_ERR;
730 }
731
732 static int setup_pam(
733                 const char *name,
734                 const char *user,
735                 uid_t uid,
736                 const char *tty,
737                 char ***pam_env,
738                 int fds[], unsigned n_fds) {
739
740         static const struct pam_conv conv = {
741                 .conv = null_conv,
742                 .appdata_ptr = NULL
743         };
744
745         pam_handle_t *handle = NULL;
746         sigset_t ss, old_ss;
747         int pam_code = PAM_SUCCESS;
748         int err;
749         char **e = NULL;
750         bool close_session = false;
751         pid_t pam_pid = 0, parent_pid;
752
753         assert(name);
754         assert(user);
755         assert(pam_env);
756
757         /* We set up PAM in the parent process, then fork. The child
758          * will then stay around until killed via PR_GET_PDEATHSIG or
759          * systemd via the cgroup logic. It will then remove the PAM
760          * session again. The parent process will exec() the actual
761          * daemon. We do things this way to ensure that the main PID
762          * of the daemon is the one we initially fork()ed. */
763
764         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
765                 handle = NULL;
766                 goto fail;
767         }
768
769         if (tty)
770                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
771                         goto fail;
772
773         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
774                 goto fail;
775
776         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
777                 goto fail;
778
779         close_session = true;
780
781         if ((!(e = pam_getenvlist(handle)))) {
782                 pam_code = PAM_BUF_ERR;
783                 goto fail;
784         }
785
786         /* Block SIGTERM, so that we know that it won't get lost in
787          * the child */
788         if (sigemptyset(&ss) < 0 ||
789             sigaddset(&ss, SIGTERM) < 0 ||
790             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
791                 goto fail;
792
793         parent_pid = getpid();
794
795         if ((pam_pid = fork()) < 0)
796                 goto fail;
797
798         if (pam_pid == 0) {
799                 int sig;
800                 int r = EXIT_PAM;
801
802                 /* The child's job is to reset the PAM session on
803                  * termination */
804
805                 /* This string must fit in 10 chars (i.e. the length
806                  * of "/sbin/init"), to look pretty in /bin/ps */
807                 rename_process("(sd-pam)");
808
809                 /* Make sure we don't keep open the passed fds in this
810                 child. We assume that otherwise only those fds are
811                 open here that have been opened by PAM. */
812                 close_many(fds, n_fds);
813
814                 /* Drop privileges - we don't need any to pam_close_session
815                  * and this will make PR_SET_PDEATHSIG work in most cases.
816                  * If this fails, ignore the error - but expect sd-pam threads
817                  * to fail to exit normally */
818                 if (setresuid(uid, uid, uid) < 0)
819                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
820
821                 /* Wait until our parent died. This will only work if
822                  * the above setresuid() succeeds, otherwise the kernel
823                  * will not allow unprivileged parents kill their privileged
824                  * children this way. We rely on the control groups kill logic
825                  * to do the rest for us. */
826                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
827                         goto child_finish;
828
829                 /* Check if our parent process might already have
830                  * died? */
831                 if (getppid() == parent_pid) {
832                         for (;;) {
833                                 if (sigwait(&ss, &sig) < 0) {
834                                         if (errno == EINTR)
835                                                 continue;
836
837                                         goto child_finish;
838                                 }
839
840                                 assert(sig == SIGTERM);
841                                 break;
842                         }
843                 }
844
845                 /* If our parent died we'll end the session */
846                 if (getppid() != parent_pid)
847                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
848                                 goto child_finish;
849
850                 r = 0;
851
852         child_finish:
853                 pam_end(handle, pam_code | PAM_DATA_SILENT);
854                 _exit(r);
855         }
856
857         /* If the child was forked off successfully it will do all the
858          * cleanups, so forget about the handle here. */
859         handle = NULL;
860
861         /* Unblock SIGTERM again in the parent */
862         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
863                 goto fail;
864
865         /* We close the log explicitly here, since the PAM modules
866          * might have opened it, but we don't want this fd around. */
867         closelog();
868
869         *pam_env = e;
870         e = NULL;
871
872         return 0;
873
874 fail:
875         if (pam_code != PAM_SUCCESS)
876                 err = -EPERM;  /* PAM errors do not map to errno */
877         else
878                 err = -errno;
879
880         if (handle) {
881                 if (close_session)
882                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
883
884                 pam_end(handle, pam_code | PAM_DATA_SILENT);
885         }
886
887         strv_free(e);
888
889         closelog();
890
891         if (pam_pid > 1) {
892                 kill(pam_pid, SIGTERM);
893                 kill(pam_pid, SIGCONT);
894         }
895
896         return err;
897 }
898 #endif
899
900 static void rename_process_from_path(const char *path) {
901         char process_name[11];
902         const char *p;
903         size_t l;
904
905         /* This resulting string must fit in 10 chars (i.e. the length
906          * of "/sbin/init") to look pretty in /bin/ps */
907
908         p = path_get_file_name(path);
909         if (isempty(p)) {
910                 rename_process("(...)");
911                 return;
912         }
913
914         l = strlen(p);
915         if (l > 8) {
916                 /* The end of the process name is usually more
917                  * interesting, since the first bit might just be
918                  * "systemd-" */
919                 p = p + l - 8;
920                 l = 8;
921         }
922
923         process_name[0] = '(';
924         memcpy(process_name+1, p, l);
925         process_name[1+l] = ')';
926         process_name[1+l+1] = 0;
927
928         rename_process(process_name);
929 }
930
931 static int apply_seccomp(uint32_t *syscall_filter) {
932         static const struct sock_filter header[] = {
933                 VALIDATE_ARCHITECTURE,
934                 EXAMINE_SYSCALL
935         };
936         static const struct sock_filter footer[] = {
937                 _KILL_PROCESS
938         };
939
940         int i;
941         unsigned n;
942         struct sock_filter *f;
943         struct sock_fprog prog;
944
945         assert(syscall_filter);
946
947         /* First: count the syscalls to check for */
948         for (i = 0, n = 0; i < syscall_max(); i++)
949                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
950                         n++;
951
952         /* Second: build the filter program from a header the syscall
953          * matches and the footer */
954         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
955         memcpy(f, header, sizeof(header));
956
957         for (i = 0, n = 0; i < syscall_max(); i++)
958                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
959                         struct sock_filter item[] = {
960                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
961                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
962                         };
963
964                         assert_cc(ELEMENTSOF(item) == 2);
965
966                         f[ELEMENTSOF(header) + 2*n]  = item[0];
967                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
968
969                         n++;
970                 }
971
972         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
973
974         /* Third: install the filter */
975         zero(prog);
976         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
977         prog.filter = f;
978         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
979                 return -errno;
980
981         return 0;
982 }
983
984 int exec_spawn(ExecCommand *command,
985                char **argv,
986                const ExecContext *context,
987                int fds[], unsigned n_fds,
988                char **environment,
989                bool apply_permissions,
990                bool apply_chroot,
991                bool apply_tty_stdin,
992                bool confirm_spawn,
993                CGroupBonding *cgroup_bondings,
994                CGroupAttribute *cgroup_attributes,
995                const char *cgroup_suffix,
996                const char *unit_id,
997                int idle_pipe[2],
998                pid_t *ret) {
999
1000         pid_t pid;
1001         int r;
1002         char *line;
1003         int socket_fd;
1004         char _cleanup_strv_free_ **files_env = NULL;
1005
1006         assert(command);
1007         assert(context);
1008         assert(ret);
1009         assert(fds || n_fds <= 0);
1010
1011         if (context->std_input == EXEC_INPUT_SOCKET ||
1012             context->std_output == EXEC_OUTPUT_SOCKET ||
1013             context->std_error == EXEC_OUTPUT_SOCKET) {
1014
1015                 if (n_fds != 1)
1016                         return -EINVAL;
1017
1018                 socket_fd = fds[0];
1019
1020                 fds = NULL;
1021                 n_fds = 0;
1022         } else
1023                 socket_fd = -1;
1024
1025         r = exec_context_load_environment(context, &files_env);
1026         if (r < 0) {
1027                 log_struct(LOG_ERR,
1028                            "UNIT=%s", unit_id,
1029                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1030                            "ERRNO=%d", -r,
1031                            NULL);
1032                 return r;
1033         }
1034
1035         if (!argv)
1036                 argv = command->argv;
1037
1038         line = exec_command_line(argv);
1039         if (!line)
1040                 return log_oom();
1041
1042         log_struct(LOG_DEBUG,
1043                    "UNIT=%s", unit_id,
1044                    "MESSAGE=About to execute %s", line,
1045                    NULL);
1046         free(line);
1047
1048         r = cgroup_bonding_realize_list(cgroup_bondings);
1049         if (r < 0)
1050                 return r;
1051
1052         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1053
1054         pid = fork();
1055         if (pid < 0)
1056                 return -errno;
1057
1058         if (pid == 0) {
1059                 int i, err;
1060                 sigset_t ss;
1061                 const char *username = NULL, *home = NULL;
1062                 uid_t uid = (uid_t) -1;
1063                 gid_t gid = (gid_t) -1;
1064                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1065                         **final_env = NULL, **final_argv = NULL;
1066                 unsigned n_env = 0;
1067                 bool set_access = false;
1068
1069                 /* child */
1070
1071                 rename_process_from_path(command->path);
1072
1073                 /* We reset exactly these signals, since they are the
1074                  * only ones we set to SIG_IGN in the main daemon. All
1075                  * others we leave untouched because we set them to
1076                  * SIG_DFL or a valid handler initially, both of which
1077                  * will be demoted to SIG_DFL. */
1078                 default_signals(SIGNALS_CRASH_HANDLER,
1079                                 SIGNALS_IGNORE, -1);
1080
1081                 if (context->ignore_sigpipe)
1082                         ignore_signals(SIGPIPE, -1);
1083
1084                 assert_se(sigemptyset(&ss) == 0);
1085                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1086                         err = -errno;
1087                         r = EXIT_SIGNAL_MASK;
1088                         goto fail_child;
1089                 }
1090
1091                 if (idle_pipe) {
1092                         if (idle_pipe[1] >= 0)
1093                                 close_nointr_nofail(idle_pipe[1]);
1094                         if (idle_pipe[0] >= 0) {
1095                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1096                                 close_nointr_nofail(idle_pipe[0]);
1097                         }
1098                 }
1099
1100                 /* Close sockets very early to make sure we don't
1101                  * block init reexecution because it cannot bind its
1102                  * sockets */
1103                 log_forget_fds();
1104                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1105                                            socket_fd >= 0 ? 1 : n_fds);
1106                 if (err < 0) {
1107                         r = EXIT_FDS;
1108                         goto fail_child;
1109                 }
1110
1111                 if (!context->same_pgrp)
1112                         if (setsid() < 0) {
1113                                 err = -errno;
1114                                 r = EXIT_SETSID;
1115                                 goto fail_child;
1116                         }
1117
1118                 if (context->tcpwrap_name) {
1119                         if (socket_fd >= 0)
1120                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1121                                         err = -EACCES;
1122                                         r = EXIT_TCPWRAP;
1123                                         goto fail_child;
1124                                 }
1125
1126                         for (i = 0; i < (int) n_fds; i++) {
1127                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1128                                         err = -EACCES;
1129                                         r = EXIT_TCPWRAP;
1130                                         goto fail_child;
1131                                 }
1132                         }
1133                 }
1134
1135                 exec_context_tty_reset(context);
1136
1137                 if (confirm_spawn) {
1138                         char response;
1139
1140                         err = ask_for_confirmation(&response, argv);
1141                         if (err == -ETIMEDOUT)
1142                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1143                         else if (err < 0)
1144                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1145                         else if (response == 's') {
1146                                 write_confirm_message("Skipping execution.\n");
1147                                 err = -ECANCELED;
1148                                 r = EXIT_CONFIRM;
1149                                 goto fail_child;
1150                         } else if (response == 'n') {
1151                                 write_confirm_message("Failing execution.\n");
1152                                 err = r = 0;
1153                                 goto fail_child;
1154                         }
1155                 }
1156
1157                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1158                  * must sure to drop O_NONBLOCK */
1159                 if (socket_fd >= 0)
1160                         fd_nonblock(socket_fd, false);
1161
1162                 err = setup_input(context, socket_fd, apply_tty_stdin);
1163                 if (err < 0) {
1164                         r = EXIT_STDIN;
1165                         goto fail_child;
1166                 }
1167
1168                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1169                 if (err < 0) {
1170                         r = EXIT_STDOUT;
1171                         goto fail_child;
1172                 }
1173
1174                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1175                 if (err < 0) {
1176                         r = EXIT_STDERR;
1177                         goto fail_child;
1178                 }
1179
1180                 if (cgroup_bondings) {
1181                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1182                         if (err < 0) {
1183                                 r = EXIT_CGROUP;
1184                                 goto fail_child;
1185                         }
1186                 }
1187
1188                 if (context->oom_score_adjust_set) {
1189                         char t[16];
1190
1191                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1192                         char_array_0(t);
1193
1194                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1195                                 err = -errno;
1196                                 r = EXIT_OOM_ADJUST;
1197                                 goto fail_child;
1198                         }
1199                 }
1200
1201                 if (context->nice_set)
1202                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1203                                 err = -errno;
1204                                 r = EXIT_NICE;
1205                                 goto fail_child;
1206                         }
1207
1208                 if (context->cpu_sched_set) {
1209                         struct sched_param param;
1210
1211                         zero(param);
1212                         param.sched_priority = context->cpu_sched_priority;
1213
1214                         if (sched_setscheduler(0, context->cpu_sched_policy |
1215                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1216                                 err = -errno;
1217                                 r = EXIT_SETSCHEDULER;
1218                                 goto fail_child;
1219                         }
1220                 }
1221
1222                 if (context->cpuset)
1223                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1224                                 err = -errno;
1225                                 r = EXIT_CPUAFFINITY;
1226                                 goto fail_child;
1227                         }
1228
1229                 if (context->ioprio_set)
1230                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1231                                 err = -errno;
1232                                 r = EXIT_IOPRIO;
1233                                 goto fail_child;
1234                         }
1235
1236                 if (context->timer_slack_nsec != (nsec_t) -1)
1237                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1238                                 err = -errno;
1239                                 r = EXIT_TIMERSLACK;
1240                                 goto fail_child;
1241                         }
1242
1243                 if (context->utmp_id)
1244                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1245
1246                 if (context->user) {
1247                         username = context->user;
1248                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1249                         if (err < 0) {
1250                                 r = EXIT_USER;
1251                                 goto fail_child;
1252                         }
1253
1254                         if (is_terminal_input(context->std_input)) {
1255                                 err = chown_terminal(STDIN_FILENO, uid);
1256                                 if (err < 0) {
1257                                         r = EXIT_STDIN;
1258                                         goto fail_child;
1259                                 }
1260                         }
1261
1262                         if (cgroup_bondings && context->control_group_modify) {
1263                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1264                                 if (err >= 0)
1265                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1266                                 if (err < 0) {
1267                                         r = EXIT_CGROUP;
1268                                         goto fail_child;
1269                                 }
1270
1271                                 set_access = true;
1272                         }
1273                 }
1274
1275                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1276                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1277                         if (err < 0) {
1278                                 r = EXIT_CGROUP;
1279                                 goto fail_child;
1280                         }
1281                 }
1282
1283                 if (apply_permissions) {
1284                         err = enforce_groups(context, username, gid);
1285                         if (err < 0) {
1286                                 r = EXIT_GROUP;
1287                                 goto fail_child;
1288                         }
1289                 }
1290
1291                 umask(context->umask);
1292
1293 #ifdef HAVE_PAM
1294                 if (apply_permissions && context->pam_name && username) {
1295                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1296                         if (err < 0) {
1297                                 r = EXIT_PAM;
1298                                 goto fail_child;
1299                         }
1300                 }
1301 #endif
1302                 if (context->private_network) {
1303                         if (unshare(CLONE_NEWNET) < 0) {
1304                                 err = -errno;
1305                                 r = EXIT_NETWORK;
1306                                 goto fail_child;
1307                         }
1308
1309                         loopback_setup();
1310                 }
1311
1312                 if (strv_length(context->read_write_dirs) > 0 ||
1313                     strv_length(context->read_only_dirs) > 0 ||
1314                     strv_length(context->inaccessible_dirs) > 0 ||
1315                     context->mount_flags != 0 ||
1316                     context->private_tmp) {
1317                         err = setup_namespace(context->read_write_dirs,
1318                                               context->read_only_dirs,
1319                                               context->inaccessible_dirs,
1320                                               context->private_tmp,
1321                                               context->mount_flags);
1322                         if (err < 0) {
1323                                 r = EXIT_NAMESPACE;
1324                                 goto fail_child;
1325                         }
1326                 }
1327
1328                 if (apply_chroot) {
1329                         if (context->root_directory)
1330                                 if (chroot(context->root_directory) < 0) {
1331                                         err = -errno;
1332                                         r = EXIT_CHROOT;
1333                                         goto fail_child;
1334                                 }
1335
1336                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1337                                 err = -errno;
1338                                 r = EXIT_CHDIR;
1339                                 goto fail_child;
1340                         }
1341                 } else {
1342                         char _cleanup_free_ *d = NULL;
1343
1344                         if (asprintf(&d, "%s/%s",
1345                                      context->root_directory ? context->root_directory : "",
1346                                      context->working_directory ? context->working_directory : "") < 0) {
1347                                 err = -ENOMEM;
1348                                 r = EXIT_MEMORY;
1349                                 goto fail_child;
1350                         }
1351
1352                         if (chdir(d) < 0) {
1353                                 err = -errno;
1354                                 r = EXIT_CHDIR;
1355                                 goto fail_child;
1356                         }
1357                 }
1358
1359                 /* We repeat the fd closing here, to make sure that
1360                  * nothing is leaked from the PAM modules */
1361                 err = close_all_fds(fds, n_fds);
1362                 if (err >= 0)
1363                         err = shift_fds(fds, n_fds);
1364                 if (err >= 0)
1365                         err = flags_fds(fds, n_fds, context->non_blocking);
1366                 if (err < 0) {
1367                         r = EXIT_FDS;
1368                         goto fail_child;
1369                 }
1370
1371                 if (apply_permissions) {
1372
1373                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1374                                 if (!context->rlimit[i])
1375                                         continue;
1376
1377                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1378                                         err = -errno;
1379                                         r = EXIT_LIMITS;
1380                                         goto fail_child;
1381                                 }
1382                         }
1383
1384                         if (context->capability_bounding_set_drop) {
1385                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1386                                 if (err < 0) {
1387                                         r = EXIT_CAPABILITIES;
1388                                         goto fail_child;
1389                                 }
1390                         }
1391
1392                         if (context->user) {
1393                                 err = enforce_user(context, uid);
1394                                 if (err < 0) {
1395                                         r = EXIT_USER;
1396                                         goto fail_child;
1397                                 }
1398                         }
1399
1400                         /* PR_GET_SECUREBITS is not privileged, while
1401                          * PR_SET_SECUREBITS is. So to suppress
1402                          * potential EPERMs we'll try not to call
1403                          * PR_SET_SECUREBITS unless necessary. */
1404                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1405                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1406                                         err = -errno;
1407                                         r = EXIT_SECUREBITS;
1408                                         goto fail_child;
1409                                 }
1410
1411                         if (context->capabilities)
1412                                 if (cap_set_proc(context->capabilities) < 0) {
1413                                         err = -errno;
1414                                         r = EXIT_CAPABILITIES;
1415                                         goto fail_child;
1416                                 }
1417
1418                         if (context->no_new_privileges)
1419                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1420                                         err = -errno;
1421                                         r = EXIT_NO_NEW_PRIVILEGES;
1422                                         goto fail_child;
1423                                 }
1424
1425                         if (context->syscall_filter) {
1426                                 err = apply_seccomp(context->syscall_filter);
1427                                 if (err < 0) {
1428                                         r = EXIT_SECCOMP;
1429                                         goto fail_child;
1430                                 }
1431                         }
1432                 }
1433
1434                 if (!(our_env = new0(char*, 7))) {
1435                         err = -ENOMEM;
1436                         r = EXIT_MEMORY;
1437                         goto fail_child;
1438                 }
1439
1440                 if (n_fds > 0)
1441                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1442                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1443                                 err = -ENOMEM;
1444                                 r = EXIT_MEMORY;
1445                                 goto fail_child;
1446                         }
1447
1448                 if (home)
1449                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1450                                 err = -ENOMEM;
1451                                 r = EXIT_MEMORY;
1452                                 goto fail_child;
1453                         }
1454
1455                 if (username)
1456                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1457                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1458                                 err = -ENOMEM;
1459                                 r = EXIT_MEMORY;
1460                                 goto fail_child;
1461                         }
1462
1463                 if (is_terminal_input(context->std_input) ||
1464                     context->std_output == EXEC_OUTPUT_TTY ||
1465                     context->std_error == EXEC_OUTPUT_TTY)
1466                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1467                                 err = -ENOMEM;
1468                                 r = EXIT_MEMORY;
1469                                 goto fail_child;
1470                         }
1471
1472                 assert(n_env <= 7);
1473
1474                 if (!(final_env = strv_env_merge(
1475                                       5,
1476                                       environment,
1477                                       our_env,
1478                                       context->environment,
1479                                       files_env,
1480                                       pam_env,
1481                                       NULL))) {
1482                         err = -ENOMEM;
1483                         r = EXIT_MEMORY;
1484                         goto fail_child;
1485                 }
1486
1487                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1488                         err = -ENOMEM;
1489                         r = EXIT_MEMORY;
1490                         goto fail_child;
1491                 }
1492
1493                 final_env = strv_env_clean(final_env);
1494
1495                 execve(command->path, final_argv, final_env);
1496                 err = -errno;
1497                 r = EXIT_EXEC;
1498
1499         fail_child:
1500                 if (r != 0) {
1501                         log_open();
1502                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1503                                    "EXECUTABLE=%s", command->path,
1504                                    "MESSAGE=Failed at step %s spawning %s: %s",
1505                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1506                                           command->path, strerror(-err),
1507                                    "ERRNO=%d", -err,
1508                                    NULL);
1509                         log_close();
1510                 }
1511
1512                 _exit(r);
1513         }
1514
1515         log_struct(LOG_DEBUG,
1516                    "UNIT=%s", unit_id,
1517                    "MESSAGE=Forked %s as %lu",
1518                           command->path, (unsigned long) pid,
1519                    NULL);
1520
1521         /* We add the new process to the cgroup both in the child (so
1522          * that we can be sure that no user code is ever executed
1523          * outside of the cgroup) and in the parent (so that we can be
1524          * sure that when we kill the cgroup the process will be
1525          * killed too). */
1526         if (cgroup_bondings)
1527                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1528
1529         exec_status_start(&command->exec_status, pid);
1530
1531         *ret = pid;
1532         return 0;
1533 }
1534
1535 void exec_context_init(ExecContext *c) {
1536         assert(c);
1537
1538         c->umask = 0022;
1539         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540         c->cpu_sched_policy = SCHED_OTHER;
1541         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542         c->syslog_level_prefix = true;
1543         c->control_group_persistent = -1;
1544         c->ignore_sigpipe = true;
1545         c->timer_slack_nsec = (nsec_t) -1;
1546 }
1547
1548 void exec_context_done(ExecContext *c) {
1549         unsigned l;
1550
1551         assert(c);
1552
1553         strv_free(c->environment);
1554         c->environment = NULL;
1555
1556         strv_free(c->environment_files);
1557         c->environment_files = NULL;
1558
1559         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1560                 free(c->rlimit[l]);
1561                 c->rlimit[l] = NULL;
1562         }
1563
1564         free(c->working_directory);
1565         c->working_directory = NULL;
1566         free(c->root_directory);
1567         c->root_directory = NULL;
1568
1569         free(c->tty_path);
1570         c->tty_path = NULL;
1571
1572         free(c->tcpwrap_name);
1573         c->tcpwrap_name = NULL;
1574
1575         free(c->syslog_identifier);
1576         c->syslog_identifier = NULL;
1577
1578         free(c->user);
1579         c->user = NULL;
1580
1581         free(c->group);
1582         c->group = NULL;
1583
1584         strv_free(c->supplementary_groups);
1585         c->supplementary_groups = NULL;
1586
1587         free(c->pam_name);
1588         c->pam_name = NULL;
1589
1590         if (c->capabilities) {
1591                 cap_free(c->capabilities);
1592                 c->capabilities = NULL;
1593         }
1594
1595         strv_free(c->read_only_dirs);
1596         c->read_only_dirs = NULL;
1597
1598         strv_free(c->read_write_dirs);
1599         c->read_write_dirs = NULL;
1600
1601         strv_free(c->inaccessible_dirs);
1602         c->inaccessible_dirs = NULL;
1603
1604         if (c->cpuset)
1605                 CPU_FREE(c->cpuset);
1606
1607         free(c->utmp_id);
1608         c->utmp_id = NULL;
1609
1610         free(c->syscall_filter);
1611         c->syscall_filter = NULL;
1612 }
1613
1614 void exec_command_done(ExecCommand *c) {
1615         assert(c);
1616
1617         free(c->path);
1618         c->path = NULL;
1619
1620         strv_free(c->argv);
1621         c->argv = NULL;
1622 }
1623
1624 void exec_command_done_array(ExecCommand *c, unsigned n) {
1625         unsigned i;
1626
1627         for (i = 0; i < n; i++)
1628                 exec_command_done(c+i);
1629 }
1630
1631 void exec_command_free_list(ExecCommand *c) {
1632         ExecCommand *i;
1633
1634         while ((i = c)) {
1635                 LIST_REMOVE(ExecCommand, command, c, i);
1636                 exec_command_done(i);
1637                 free(i);
1638         }
1639 }
1640
1641 void exec_command_free_array(ExecCommand **c, unsigned n) {
1642         unsigned i;
1643
1644         for (i = 0; i < n; i++) {
1645                 exec_command_free_list(c[i]);
1646                 c[i] = NULL;
1647         }
1648 }
1649
1650 int exec_context_load_environment(const ExecContext *c, char ***l) {
1651         char **i, **r = NULL;
1652
1653         assert(c);
1654         assert(l);
1655
1656         STRV_FOREACH(i, c->environment_files) {
1657                 char *fn;
1658                 int k;
1659                 bool ignore = false;
1660                 char **p;
1661                 glob_t pglob;
1662                 int count, n;
1663
1664                 fn = *i;
1665
1666                 if (fn[0] == '-') {
1667                         ignore = true;
1668                         fn ++;
1669                 }
1670
1671                 if (!path_is_absolute(fn)) {
1672
1673                         if (ignore)
1674                                 continue;
1675
1676                         strv_free(r);
1677                         return -EINVAL;
1678                 }
1679
1680                 /* Filename supports globbing, take all matching files */
1681                 zero(pglob);
1682                 errno = 0;
1683                 if (glob(fn, 0, NULL, &pglob) != 0) {
1684                         globfree(&pglob);
1685                         if (ignore)
1686                                 continue;
1687
1688                         strv_free(r);
1689                         return errno ? -errno : -EINVAL;
1690                 }
1691                 count = pglob.gl_pathc;
1692                 if (count == 0) {
1693                         globfree(&pglob);
1694                         if (ignore)
1695                                 continue;
1696
1697                         strv_free(r);
1698                         return -EINVAL;
1699                 }
1700                 for (n = 0; n < count; n++) {
1701                         k = load_env_file(pglob.gl_pathv[n], &p);
1702                         if (k < 0) {
1703                                 if (ignore)
1704                                         continue;
1705
1706                                 strv_free(r);
1707                                 globfree(&pglob);
1708                                 return k;
1709                          }
1710
1711                         if (r == NULL)
1712                                 r = p;
1713                         else {
1714                                 char **m;
1715
1716                                 m = strv_env_merge(2, r, p);
1717                                 strv_free(r);
1718                                 strv_free(p);
1719
1720                                 if (!m) {
1721                                         globfree(&pglob);
1722                                         return -ENOMEM;
1723                                 }
1724
1725                                 r = m;
1726                         }
1727                 }
1728                 globfree(&pglob);
1729         }
1730
1731         *l = r;
1732
1733         return 0;
1734 }
1735
1736 static void strv_fprintf(FILE *f, char **l) {
1737         char **g;
1738
1739         assert(f);
1740
1741         STRV_FOREACH(g, l)
1742                 fprintf(f, " %s", *g);
1743 }
1744
1745 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1746         char ** e;
1747         unsigned i;
1748
1749         assert(c);
1750         assert(f);
1751
1752         if (!prefix)
1753                 prefix = "";
1754
1755         fprintf(f,
1756                 "%sUMask: %04o\n"
1757                 "%sWorkingDirectory: %s\n"
1758                 "%sRootDirectory: %s\n"
1759                 "%sNonBlocking: %s\n"
1760                 "%sPrivateTmp: %s\n"
1761                 "%sControlGroupModify: %s\n"
1762                 "%sControlGroupPersistent: %s\n"
1763                 "%sPrivateNetwork: %s\n"
1764                 "%sIgnoreSIGPIPE: %s\n",
1765                 prefix, c->umask,
1766                 prefix, c->working_directory ? c->working_directory : "/",
1767                 prefix, c->root_directory ? c->root_directory : "/",
1768                 prefix, yes_no(c->non_blocking),
1769                 prefix, yes_no(c->private_tmp),
1770                 prefix, yes_no(c->control_group_modify),
1771                 prefix, yes_no(c->control_group_persistent),
1772                 prefix, yes_no(c->private_network),
1773                 prefix, yes_no(c->ignore_sigpipe));
1774
1775         STRV_FOREACH(e, c->environment)
1776                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1777
1778         STRV_FOREACH(e, c->environment_files)
1779                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1780
1781         if (c->tcpwrap_name)
1782                 fprintf(f,
1783                         "%sTCPWrapName: %s\n",
1784                         prefix, c->tcpwrap_name);
1785
1786         if (c->nice_set)
1787                 fprintf(f,
1788                         "%sNice: %i\n",
1789                         prefix, c->nice);
1790
1791         if (c->oom_score_adjust_set)
1792                 fprintf(f,
1793                         "%sOOMScoreAdjust: %i\n",
1794                         prefix, c->oom_score_adjust);
1795
1796         for (i = 0; i < RLIM_NLIMITS; i++)
1797                 if (c->rlimit[i])
1798                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1799
1800         if (c->ioprio_set) {
1801                 char *class_str;
1802                 int r;
1803
1804                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1805                 if (r < 0)
1806                         class_str = NULL;
1807                 fprintf(f,
1808                         "%sIOSchedulingClass: %s\n"
1809                         "%sIOPriority: %i\n",
1810                         prefix, strna(class_str),
1811                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1812                 free(class_str);
1813         }
1814
1815         if (c->cpu_sched_set) {
1816                 char *policy_str;
1817                 int r;
1818
1819                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1820                 if (r < 0)
1821                         policy_str = NULL;
1822                 fprintf(f,
1823                         "%sCPUSchedulingPolicy: %s\n"
1824                         "%sCPUSchedulingPriority: %i\n"
1825                         "%sCPUSchedulingResetOnFork: %s\n",
1826                         prefix, strna(policy_str),
1827                         prefix, c->cpu_sched_priority,
1828                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1829                 free(policy_str);
1830         }
1831
1832         if (c->cpuset) {
1833                 fprintf(f, "%sCPUAffinity:", prefix);
1834                 for (i = 0; i < c->cpuset_ncpus; i++)
1835                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1836                                 fprintf(f, " %i", i);
1837                 fputs("\n", f);
1838         }
1839
1840         if (c->timer_slack_nsec != (nsec_t) -1)
1841                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1842
1843         fprintf(f,
1844                 "%sStandardInput: %s\n"
1845                 "%sStandardOutput: %s\n"
1846                 "%sStandardError: %s\n",
1847                 prefix, exec_input_to_string(c->std_input),
1848                 prefix, exec_output_to_string(c->std_output),
1849                 prefix, exec_output_to_string(c->std_error));
1850
1851         if (c->tty_path)
1852                 fprintf(f,
1853                         "%sTTYPath: %s\n"
1854                         "%sTTYReset: %s\n"
1855                         "%sTTYVHangup: %s\n"
1856                         "%sTTYVTDisallocate: %s\n",
1857                         prefix, c->tty_path,
1858                         prefix, yes_no(c->tty_reset),
1859                         prefix, yes_no(c->tty_vhangup),
1860                         prefix, yes_no(c->tty_vt_disallocate));
1861
1862         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1863             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1864             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1865             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1866                 char *fac_str, *lvl_str;
1867                 int r;
1868
1869                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1870                 if (r < 0)
1871                         fac_str = NULL;
1872
1873                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1874                 if (r < 0)
1875                         lvl_str = NULL;
1876
1877                 fprintf(f,
1878                         "%sSyslogFacility: %s\n"
1879                         "%sSyslogLevel: %s\n",
1880                         prefix, strna(fac_str),
1881                         prefix, strna(lvl_str));
1882                 free(lvl_str);
1883                 free(fac_str);
1884         }
1885
1886         if (c->capabilities) {
1887                 char *t;
1888                 if ((t = cap_to_text(c->capabilities, NULL))) {
1889                         fprintf(f, "%sCapabilities: %s\n",
1890                                 prefix, t);
1891                         cap_free(t);
1892                 }
1893         }
1894
1895         if (c->secure_bits)
1896                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1897                         prefix,
1898                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1899                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1900                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1901                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1902                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1903                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1904
1905         if (c->capability_bounding_set_drop) {
1906                 unsigned long l;
1907                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1908
1909                 for (l = 0; l <= cap_last_cap(); l++)
1910                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1911                                 char *t;
1912
1913                                 if ((t = cap_to_name(l))) {
1914                                         fprintf(f, " %s", t);
1915                                         cap_free(t);
1916                                 }
1917                         }
1918
1919                 fputs("\n", f);
1920         }
1921
1922         if (c->user)
1923                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1924         if (c->group)
1925                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1926
1927         if (strv_length(c->supplementary_groups) > 0) {
1928                 fprintf(f, "%sSupplementaryGroups:", prefix);
1929                 strv_fprintf(f, c->supplementary_groups);
1930                 fputs("\n", f);
1931         }
1932
1933         if (c->pam_name)
1934                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1935
1936         if (strv_length(c->read_write_dirs) > 0) {
1937                 fprintf(f, "%sReadWriteDirs:", prefix);
1938                 strv_fprintf(f, c->read_write_dirs);
1939                 fputs("\n", f);
1940         }
1941
1942         if (strv_length(c->read_only_dirs) > 0) {
1943                 fprintf(f, "%sReadOnlyDirs:", prefix);
1944                 strv_fprintf(f, c->read_only_dirs);
1945                 fputs("\n", f);
1946         }
1947
1948         if (strv_length(c->inaccessible_dirs) > 0) {
1949                 fprintf(f, "%sInaccessibleDirs:", prefix);
1950                 strv_fprintf(f, c->inaccessible_dirs);
1951                 fputs("\n", f);
1952         }
1953
1954         if (c->utmp_id)
1955                 fprintf(f,
1956                         "%sUtmpIdentifier: %s\n",
1957                         prefix, c->utmp_id);
1958 }
1959
1960 void exec_status_start(ExecStatus *s, pid_t pid) {
1961         assert(s);
1962
1963         zero(*s);
1964         s->pid = pid;
1965         dual_timestamp_get(&s->start_timestamp);
1966 }
1967
1968 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1969         assert(s);
1970
1971         if (s->pid && s->pid != pid)
1972                 zero(*s);
1973
1974         s->pid = pid;
1975         dual_timestamp_get(&s->exit_timestamp);
1976
1977         s->code = code;
1978         s->status = status;
1979
1980         if (context) {
1981                 if (context->utmp_id)
1982                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1983
1984                 exec_context_tty_reset(context);
1985         }
1986 }
1987
1988 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1989         char buf[FORMAT_TIMESTAMP_MAX];
1990
1991         assert(s);
1992         assert(f);
1993
1994         if (!prefix)
1995                 prefix = "";
1996
1997         if (s->pid <= 0)
1998                 return;
1999
2000         fprintf(f,
2001                 "%sPID: %lu\n",
2002                 prefix, (unsigned long) s->pid);
2003
2004         if (s->start_timestamp.realtime > 0)
2005                 fprintf(f,
2006                         "%sStart Timestamp: %s\n",
2007                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2008
2009         if (s->exit_timestamp.realtime > 0)
2010                 fprintf(f,
2011                         "%sExit Timestamp: %s\n"
2012                         "%sExit Code: %s\n"
2013                         "%sExit Status: %i\n",
2014                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2015                         prefix, sigchld_code_to_string(s->code),
2016                         prefix, s->status);
2017 }
2018
2019 char *exec_command_line(char **argv) {
2020         size_t k;
2021         char *n, *p, **a;
2022         bool first = true;
2023
2024         assert(argv);
2025
2026         k = 1;
2027         STRV_FOREACH(a, argv)
2028                 k += strlen(*a)+3;
2029
2030         if (!(n = new(char, k)))
2031                 return NULL;
2032
2033         p = n;
2034         STRV_FOREACH(a, argv) {
2035
2036                 if (!first)
2037                         *(p++) = ' ';
2038                 else
2039                         first = false;
2040
2041                 if (strpbrk(*a, WHITESPACE)) {
2042                         *(p++) = '\'';
2043                         p = stpcpy(p, *a);
2044                         *(p++) = '\'';
2045                 } else
2046                         p = stpcpy(p, *a);
2047
2048         }
2049
2050         *p = 0;
2051
2052         /* FIXME: this doesn't really handle arguments that have
2053          * spaces and ticks in them */
2054
2055         return n;
2056 }
2057
2058 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2059         char *p2;
2060         const char *prefix2;
2061
2062         char *cmd;
2063
2064         assert(c);
2065         assert(f);
2066
2067         if (!prefix)
2068                 prefix = "";
2069         p2 = strappend(prefix, "\t");
2070         prefix2 = p2 ? p2 : prefix;
2071
2072         cmd = exec_command_line(c->argv);
2073
2074         fprintf(f,
2075                 "%sCommand Line: %s\n",
2076                 prefix, cmd ? cmd : strerror(ENOMEM));
2077
2078         free(cmd);
2079
2080         exec_status_dump(&c->exec_status, f, prefix2);
2081
2082         free(p2);
2083 }
2084
2085 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2086         assert(f);
2087
2088         if (!prefix)
2089                 prefix = "";
2090
2091         LIST_FOREACH(command, c, c)
2092                 exec_command_dump(c, f, prefix);
2093 }
2094
2095 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2096         ExecCommand *end;
2097
2098         assert(l);
2099         assert(e);
2100
2101         if (*l) {
2102                 /* It's kind of important, that we keep the order here */
2103                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2104                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2105         } else
2106               *l = e;
2107 }
2108
2109 int exec_command_set(ExecCommand *c, const char *path, ...) {
2110         va_list ap;
2111         char **l, *p;
2112
2113         assert(c);
2114         assert(path);
2115
2116         va_start(ap, path);
2117         l = strv_new_ap(path, ap);
2118         va_end(ap);
2119
2120         if (!l)
2121                 return -ENOMEM;
2122
2123         if (!(p = strdup(path))) {
2124                 strv_free(l);
2125                 return -ENOMEM;
2126         }
2127
2128         free(c->path);
2129         c->path = p;
2130
2131         strv_free(c->argv);
2132         c->argv = l;
2133
2134         return 0;
2135 }
2136
2137 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2138         [EXEC_INPUT_NULL] = "null",
2139         [EXEC_INPUT_TTY] = "tty",
2140         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2141         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2142         [EXEC_INPUT_SOCKET] = "socket"
2143 };
2144
2145 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2146
2147 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2148         [EXEC_OUTPUT_INHERIT] = "inherit",
2149         [EXEC_OUTPUT_NULL] = "null",
2150         [EXEC_OUTPUT_TTY] = "tty",
2151         [EXEC_OUTPUT_SYSLOG] = "syslog",
2152         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2153         [EXEC_OUTPUT_KMSG] = "kmsg",
2154         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2155         [EXEC_OUTPUT_JOURNAL] = "journal",
2156         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2157         [EXEC_OUTPUT_SOCKET] = "socket"
2158 };
2159
2160 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);