chiark / gitweb /
env: considerably beef up environment cleaning logic
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68
69 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
70
71 /* This assumes there is a 'tty' group */
72 #define TTY_MODE 0620
73
74 static int shift_fds(int fds[], unsigned n_fds) {
75         int start, restart_from;
76
77         if (n_fds <= 0)
78                 return 0;
79
80         /* Modifies the fds array! (sorts it) */
81
82         assert(fds);
83
84         start = 0;
85         for (;;) {
86                 int i;
87
88                 restart_from = -1;
89
90                 for (i = start; i < (int) n_fds; i++) {
91                         int nfd;
92
93                         /* Already at right index? */
94                         if (fds[i] == i+3)
95                                 continue;
96
97                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
98                                 return -errno;
99
100                         close_nointr_nofail(fds[i]);
101                         fds[i] = nfd;
102
103                         /* Hmm, the fd we wanted isn't free? Then
104                          * let's remember that and try again from here*/
105                         if (nfd != i+3 && restart_from < 0)
106                                 restart_from = i;
107                 }
108
109                 if (restart_from < 0)
110                         break;
111
112                 start = restart_from;
113         }
114
115         return 0;
116 }
117
118 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
119         unsigned i;
120         int r;
121
122         if (n_fds <= 0)
123                 return 0;
124
125         assert(fds);
126
127         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
128
129         for (i = 0; i < n_fds; i++) {
130
131                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
132                         return r;
133
134                 /* We unconditionally drop FD_CLOEXEC from the fds,
135                  * since after all we want to pass these fds to our
136                  * children */
137
138                 if ((r = fd_cloexec(fds[i], false)) < 0)
139                         return r;
140         }
141
142         return 0;
143 }
144
145 static const char *tty_path(const ExecContext *context) {
146         assert(context);
147
148         if (context->tty_path)
149                 return context->tty_path;
150
151         return "/dev/console";
152 }
153
154 void exec_context_tty_reset(const ExecContext *context) {
155         assert(context);
156
157         if (context->tty_vhangup)
158                 terminal_vhangup(tty_path(context));
159
160         if (context->tty_reset)
161                 reset_terminal(tty_path(context));
162
163         if (context->tty_vt_disallocate && context->tty_path)
164                 vt_disallocate(context->tty_path);
165 }
166
167 static int open_null_as(int flags, int nfd) {
168         int fd, r;
169
170         assert(nfd >= 0);
171
172         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
173                 return -errno;
174
175         if (fd != nfd) {
176                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
177                 close_nointr_nofail(fd);
178         } else
179                 r = nfd;
180
181         return r;
182 }
183
184 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
185         int fd, r;
186         union sockaddr_union sa;
187
188         assert(context);
189         assert(output < _EXEC_OUTPUT_MAX);
190         assert(ident);
191         assert(nfd >= 0);
192
193         fd = socket(AF_UNIX, SOCK_STREAM, 0);
194         if (fd < 0)
195                 return -errno;
196
197         zero(sa);
198         sa.un.sun_family = AF_UNIX;
199         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
200
201         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
202         if (r < 0) {
203                 close_nointr_nofail(fd);
204                 return -errno;
205         }
206
207         if (shutdown(fd, SHUT_RD) < 0) {
208                 close_nointr_nofail(fd);
209                 return -errno;
210         }
211
212         dprintf(fd,
213                 "%s\n"
214                 "%s\n"
215                 "%i\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n"
219                 "%i\n",
220                 context->syslog_identifier ? context->syslog_identifier : ident,
221                 unit_id,
222                 context->syslog_priority,
223                 !!context->syslog_level_prefix,
224                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
225                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
226                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
227
228         if (fd != nfd) {
229                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
230                 close_nointr_nofail(fd);
231         } else
232                 r = nfd;
233
234         return r;
235 }
236 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
237         int fd, r;
238
239         assert(path);
240         assert(nfd >= 0);
241
242         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
243                 return fd;
244
245         if (fd != nfd) {
246                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
247                 close_nointr_nofail(fd);
248         } else
249                 r = nfd;
250
251         return r;
252 }
253
254 static bool is_terminal_input(ExecInput i) {
255         return
256                 i == EXEC_INPUT_TTY ||
257                 i == EXEC_INPUT_TTY_FORCE ||
258                 i == EXEC_INPUT_TTY_FAIL;
259 }
260
261 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
262
263         if (is_terminal_input(std_input) && !apply_tty_stdin)
264                 return EXEC_INPUT_NULL;
265
266         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
267                 return EXEC_INPUT_NULL;
268
269         return std_input;
270 }
271
272 static int fixup_output(ExecOutput std_output, int socket_fd) {
273
274         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
275                 return EXEC_OUTPUT_INHERIT;
276
277         return std_output;
278 }
279
280 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
281         ExecInput i;
282
283         assert(context);
284
285         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
286
287         switch (i) {
288
289         case EXEC_INPUT_NULL:
290                 return open_null_as(O_RDONLY, STDIN_FILENO);
291
292         case EXEC_INPUT_TTY:
293         case EXEC_INPUT_TTY_FORCE:
294         case EXEC_INPUT_TTY_FAIL: {
295                 int fd, r;
296
297                 if ((fd = acquire_terminal(
298                                      tty_path(context),
299                                      i == EXEC_INPUT_TTY_FAIL,
300                                      i == EXEC_INPUT_TTY_FORCE,
301                                      false,
302                                      (usec_t) -1)) < 0)
303                         return fd;
304
305                 if (fd != STDIN_FILENO) {
306                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
307                         close_nointr_nofail(fd);
308                 } else
309                         r = STDIN_FILENO;
310
311                 return r;
312         }
313
314         case EXEC_INPUT_SOCKET:
315                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
316
317         default:
318                 assert_not_reached("Unknown input type");
319         }
320 }
321
322 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
323         ExecOutput o;
324         ExecInput i;
325
326         assert(context);
327         assert(ident);
328
329         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
330         o = fixup_output(context->std_output, socket_fd);
331
332         /* This expects the input is already set up */
333
334         switch (o) {
335
336         case EXEC_OUTPUT_INHERIT:
337
338                 /* If input got downgraded, inherit the original value */
339                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
340                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
341
342                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
343                 if (i != EXEC_INPUT_NULL)
344                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
345
346                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
347                 if (getppid() != 1)
348                         return STDOUT_FILENO;
349
350                 /* We need to open /dev/null here anew, to get the
351                  * right access mode. So we fall through */
352
353         case EXEC_OUTPUT_NULL:
354                 return open_null_as(O_WRONLY, STDOUT_FILENO);
355
356         case EXEC_OUTPUT_TTY:
357                 if (is_terminal_input(i))
358                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
359
360                 /* We don't reset the terminal if this is just about output */
361                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
362
363         case EXEC_OUTPUT_SYSLOG:
364         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
365         case EXEC_OUTPUT_KMSG:
366         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
367         case EXEC_OUTPUT_JOURNAL:
368         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
369                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
370
371         case EXEC_OUTPUT_SOCKET:
372                 assert(socket_fd >= 0);
373                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
374
375         default:
376                 assert_not_reached("Unknown output type");
377         }
378 }
379
380 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
381         ExecOutput o, e;
382         ExecInput i;
383
384         assert(context);
385         assert(ident);
386
387         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
388         o = fixup_output(context->std_output, socket_fd);
389         e = fixup_output(context->std_error, socket_fd);
390
391         /* This expects the input and output are already set up */
392
393         /* Don't change the stderr file descriptor if we inherit all
394          * the way and are not on a tty */
395         if (e == EXEC_OUTPUT_INHERIT &&
396             o == EXEC_OUTPUT_INHERIT &&
397             i == EXEC_INPUT_NULL &&
398             !is_terminal_input(context->std_input) &&
399             getppid () != 1)
400                 return STDERR_FILENO;
401
402         /* Duplicate from stdout if possible */
403         if (e == o || e == EXEC_OUTPUT_INHERIT)
404                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
405
406         switch (e) {
407
408         case EXEC_OUTPUT_NULL:
409                 return open_null_as(O_WRONLY, STDERR_FILENO);
410
411         case EXEC_OUTPUT_TTY:
412                 if (is_terminal_input(i))
413                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
414
415                 /* We don't reset the terminal if this is just about output */
416                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
417
418         case EXEC_OUTPUT_SYSLOG:
419         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420         case EXEC_OUTPUT_KMSG:
421         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422         case EXEC_OUTPUT_JOURNAL:
423         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
425
426         case EXEC_OUTPUT_SOCKET:
427                 assert(socket_fd >= 0);
428                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
429
430         default:
431                 assert_not_reached("Unknown error type");
432         }
433 }
434
435 static int chown_terminal(int fd, uid_t uid) {
436         struct stat st;
437
438         assert(fd >= 0);
439
440         /* This might fail. What matters are the results. */
441         (void) fchown(fd, uid, -1);
442         (void) fchmod(fd, TTY_MODE);
443
444         if (fstat(fd, &st) < 0)
445                 return -errno;
446
447         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
448                 return -EPERM;
449
450         return 0;
451 }
452
453 static int setup_confirm_stdio(int *_saved_stdin,
454                                int *_saved_stdout) {
455         int fd = -1, saved_stdin, saved_stdout = -1, r;
456
457         assert(_saved_stdin);
458         assert(_saved_stdout);
459
460         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
461         if (saved_stdin < 0)
462                 return -errno;
463
464         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
465         if (saved_stdout < 0) {
466                 r = errno;
467                 goto fail;
468         }
469
470         fd = acquire_terminal(
471                         "/dev/console",
472                         false,
473                         false,
474                         false,
475                         DEFAULT_CONFIRM_USEC);
476         if (fd < 0) {
477                 r = fd;
478                 goto fail;
479         }
480
481         r = chown_terminal(fd, getuid());
482         if (r < 0)
483                 goto fail;
484
485         if (dup2(fd, STDIN_FILENO) < 0) {
486                 r = -errno;
487                 goto fail;
488         }
489
490         if (dup2(fd, STDOUT_FILENO) < 0) {
491                 r = -errno;
492                 goto fail;
493         }
494
495         if (fd >= 2)
496                 close_nointr_nofail(fd);
497
498         *_saved_stdin = saved_stdin;
499         *_saved_stdout = saved_stdout;
500
501         return 0;
502
503 fail:
504         if (saved_stdout >= 0)
505                 close_nointr_nofail(saved_stdout);
506
507         if (saved_stdin >= 0)
508                 close_nointr_nofail(saved_stdin);
509
510         if (fd >= 0)
511                 close_nointr_nofail(fd);
512
513         return r;
514 }
515
516 static int write_confirm_message(const char *format, ...) {
517         int fd;
518         va_list ap;
519
520         assert(format);
521
522         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
523         if (fd < 0)
524                 return fd;
525
526         va_start(ap, format);
527         vdprintf(fd, format, ap);
528         va_end(ap);
529
530         close_nointr_nofail(fd);
531
532         return 0;
533 }
534
535 static int restore_confirm_stdio(int *saved_stdin,
536                                  int *saved_stdout) {
537
538         int r = 0;
539
540         assert(saved_stdin);
541         assert(saved_stdout);
542
543         release_terminal();
544
545         if (*saved_stdin >= 0)
546                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
547                         r = -errno;
548
549         if (*saved_stdout >= 0)
550                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
551                         r = -errno;
552
553         if (*saved_stdin >= 0)
554                 close_nointr_nofail(*saved_stdin);
555
556         if (*saved_stdout >= 0)
557                 close_nointr_nofail(*saved_stdout);
558
559         return r;
560 }
561
562 static int ask_for_confirmation(char *response, char **argv) {
563         int saved_stdout = -1, saved_stdin = -1, r;
564         char *line;
565
566         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567         if (r < 0)
568                 return r;
569
570         line = exec_command_line(argv);
571         if (!line)
572                 return -ENOMEM;
573
574         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
575         free(line);
576
577         restore_confirm_stdio(&saved_stdin, &saved_stdout);
578
579         return r;
580 }
581
582 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
583         bool keep_groups = false;
584         int r;
585
586         assert(context);
587
588         /* Lookup and set GID and supplementary group list. Here too
589          * we avoid NSS lookups for gid=0. */
590
591         if (context->group || username) {
592
593                 if (context->group) {
594                         const char *g = context->group;
595
596                         if ((r = get_group_creds(&g, &gid)) < 0)
597                                 return r;
598                 }
599
600                 /* First step, initialize groups from /etc/groups */
601                 if (username && gid != 0) {
602                         if (initgroups(username, gid) < 0)
603                                 return -errno;
604
605                         keep_groups = true;
606                 }
607
608                 /* Second step, set our gids */
609                 if (setresgid(gid, gid, gid) < 0)
610                         return -errno;
611         }
612
613         if (context->supplementary_groups) {
614                 int ngroups_max, k;
615                 gid_t *gids;
616                 char **i;
617
618                 /* Final step, initialize any manually set supplementary groups */
619                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
620
621                 if (!(gids = new(gid_t, ngroups_max)))
622                         return -ENOMEM;
623
624                 if (keep_groups) {
625                         if ((k = getgroups(ngroups_max, gids)) < 0) {
626                                 free(gids);
627                                 return -errno;
628                         }
629                 } else
630                         k = 0;
631
632                 STRV_FOREACH(i, context->supplementary_groups) {
633                         const char *g;
634
635                         if (k >= ngroups_max) {
636                                 free(gids);
637                                 return -E2BIG;
638                         }
639
640                         g = *i;
641                         r = get_group_creds(&g, gids+k);
642                         if (r < 0) {
643                                 free(gids);
644                                 return r;
645                         }
646
647                         k++;
648                 }
649
650                 if (setgroups(k, gids) < 0) {
651                         free(gids);
652                         return -errno;
653                 }
654
655                 free(gids);
656         }
657
658         return 0;
659 }
660
661 static int enforce_user(const ExecContext *context, uid_t uid) {
662         int r;
663         assert(context);
664
665         /* Sets (but doesn't lookup) the uid and make sure we keep the
666          * capabilities while doing so. */
667
668         if (context->capabilities) {
669                 cap_t d;
670                 static const cap_value_t bits[] = {
671                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
672                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
673                 };
674
675                 /* First step: If we need to keep capabilities but
676                  * drop privileges we need to make sure we keep our
677                  * caps, whiel we drop privileges. */
678                 if (uid != 0) {
679                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
680
681                         if (prctl(PR_GET_SECUREBITS) != sb)
682                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
683                                         return -errno;
684                 }
685
686                 /* Second step: set the capabilities. This will reduce
687                  * the capabilities to the minimum we need. */
688
689                 if (!(d = cap_dup(context->capabilities)))
690                         return -errno;
691
692                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
693                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
694                         r = -errno;
695                         cap_free(d);
696                         return r;
697                 }
698
699                 if (cap_set_proc(d) < 0) {
700                         r = -errno;
701                         cap_free(d);
702                         return r;
703                 }
704
705                 cap_free(d);
706         }
707
708         /* Third step: actually set the uids */
709         if (setresuid(uid, uid, uid) < 0)
710                 return -errno;
711
712         /* At this point we should have all necessary capabilities but
713            are otherwise a normal user. However, the caps might got
714            corrupted due to the setresuid() so we need clean them up
715            later. This is done outside of this call. */
716
717         return 0;
718 }
719
720 #ifdef HAVE_PAM
721
722 static int null_conv(
723                 int num_msg,
724                 const struct pam_message **msg,
725                 struct pam_response **resp,
726                 void *appdata_ptr) {
727
728         /* We don't support conversations */
729
730         return PAM_CONV_ERR;
731 }
732
733 static int setup_pam(
734                 const char *name,
735                 const char *user,
736                 uid_t uid,
737                 const char *tty,
738                 char ***pam_env,
739                 int fds[], unsigned n_fds) {
740
741         static const struct pam_conv conv = {
742                 .conv = null_conv,
743                 .appdata_ptr = NULL
744         };
745
746         pam_handle_t *handle = NULL;
747         sigset_t ss, old_ss;
748         int pam_code = PAM_SUCCESS;
749         int err;
750         char **e = NULL;
751         bool close_session = false;
752         pid_t pam_pid = 0, parent_pid;
753
754         assert(name);
755         assert(user);
756         assert(pam_env);
757
758         /* We set up PAM in the parent process, then fork. The child
759          * will then stay around until killed via PR_GET_PDEATHSIG or
760          * systemd via the cgroup logic. It will then remove the PAM
761          * session again. The parent process will exec() the actual
762          * daemon. We do things this way to ensure that the main PID
763          * of the daemon is the one we initially fork()ed. */
764
765         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
766                 handle = NULL;
767                 goto fail;
768         }
769
770         if (tty)
771                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
772                         goto fail;
773
774         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
775                 goto fail;
776
777         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         if ((!(e = pam_getenvlist(handle)))) {
783                 pam_code = PAM_BUF_ERR;
784                 goto fail;
785         }
786
787         /* Block SIGTERM, so that we know that it won't get lost in
788          * the child */
789         if (sigemptyset(&ss) < 0 ||
790             sigaddset(&ss, SIGTERM) < 0 ||
791             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
792                 goto fail;
793
794         parent_pid = getpid();
795
796         if ((pam_pid = fork()) < 0)
797                 goto fail;
798
799         if (pam_pid == 0) {
800                 int sig;
801                 int r = EXIT_PAM;
802
803                 /* The child's job is to reset the PAM session on
804                  * termination */
805
806                 /* This string must fit in 10 chars (i.e. the length
807                  * of "/sbin/init"), to look pretty in /bin/ps */
808                 rename_process("(sd-pam)");
809
810                 /* Make sure we don't keep open the passed fds in this
811                 child. We assume that otherwise only those fds are
812                 open here that have been opened by PAM. */
813                 close_many(fds, n_fds);
814
815                 /* Drop privileges - we don't need any to pam_close_session
816                  * and this will make PR_SET_PDEATHSIG work in most cases.
817                  * If this fails, ignore the error - but expect sd-pam threads
818                  * to fail to exit normally */
819                 if (setresuid(uid, uid, uid) < 0)
820                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
821
822                 /* Wait until our parent died. This will only work if
823                  * the above setresuid() succeeds, otherwise the kernel
824                  * will not allow unprivileged parents kill their privileged
825                  * children this way. We rely on the control groups kill logic
826                  * to do the rest for us. */
827                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
828                         goto child_finish;
829
830                 /* Check if our parent process might already have
831                  * died? */
832                 if (getppid() == parent_pid) {
833                         for (;;) {
834                                 if (sigwait(&ss, &sig) < 0) {
835                                         if (errno == EINTR)
836                                                 continue;
837
838                                         goto child_finish;
839                                 }
840
841                                 assert(sig == SIGTERM);
842                                 break;
843                         }
844                 }
845
846                 /* If our parent died we'll end the session */
847                 if (getppid() != parent_pid)
848                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
849                                 goto child_finish;
850
851                 r = 0;
852
853         child_finish:
854                 pam_end(handle, pam_code | PAM_DATA_SILENT);
855                 _exit(r);
856         }
857
858         /* If the child was forked off successfully it will do all the
859          * cleanups, so forget about the handle here. */
860         handle = NULL;
861
862         /* Unblock SIGTERM again in the parent */
863         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
864                 goto fail;
865
866         /* We close the log explicitly here, since the PAM modules
867          * might have opened it, but we don't want this fd around. */
868         closelog();
869
870         *pam_env = e;
871         e = NULL;
872
873         return 0;
874
875 fail:
876         if (pam_code != PAM_SUCCESS)
877                 err = -EPERM;  /* PAM errors do not map to errno */
878         else
879                 err = -errno;
880
881         if (handle) {
882                 if (close_session)
883                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
884
885                 pam_end(handle, pam_code | PAM_DATA_SILENT);
886         }
887
888         strv_free(e);
889
890         closelog();
891
892         if (pam_pid > 1) {
893                 kill(pam_pid, SIGTERM);
894                 kill(pam_pid, SIGCONT);
895         }
896
897         return err;
898 }
899 #endif
900
901 static void rename_process_from_path(const char *path) {
902         char process_name[11];
903         const char *p;
904         size_t l;
905
906         /* This resulting string must fit in 10 chars (i.e. the length
907          * of "/sbin/init") to look pretty in /bin/ps */
908
909         p = path_get_file_name(path);
910         if (isempty(p)) {
911                 rename_process("(...)");
912                 return;
913         }
914
915         l = strlen(p);
916         if (l > 8) {
917                 /* The end of the process name is usually more
918                  * interesting, since the first bit might just be
919                  * "systemd-" */
920                 p = p + l - 8;
921                 l = 8;
922         }
923
924         process_name[0] = '(';
925         memcpy(process_name+1, p, l);
926         process_name[1+l] = ')';
927         process_name[1+l+1] = 0;
928
929         rename_process(process_name);
930 }
931
932 static int apply_seccomp(uint32_t *syscall_filter) {
933         static const struct sock_filter header[] = {
934                 VALIDATE_ARCHITECTURE,
935                 EXAMINE_SYSCALL
936         };
937         static const struct sock_filter footer[] = {
938                 _KILL_PROCESS
939         };
940
941         int i;
942         unsigned n;
943         struct sock_filter *f;
944         struct sock_fprog prog;
945
946         assert(syscall_filter);
947
948         /* First: count the syscalls to check for */
949         for (i = 0, n = 0; i < syscall_max(); i++)
950                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
951                         n++;
952
953         /* Second: build the filter program from a header the syscall
954          * matches and the footer */
955         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
956         memcpy(f, header, sizeof(header));
957
958         for (i = 0, n = 0; i < syscall_max(); i++)
959                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
960                         struct sock_filter item[] = {
961                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
962                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
963                         };
964
965                         assert_cc(ELEMENTSOF(item) == 2);
966
967                         f[ELEMENTSOF(header) + 2*n]  = item[0];
968                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
969
970                         n++;
971                 }
972
973         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
974
975         /* Third: install the filter */
976         zero(prog);
977         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
978         prog.filter = f;
979         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
980                 return -errno;
981
982         return 0;
983 }
984
985 int exec_spawn(ExecCommand *command,
986                char **argv,
987                const ExecContext *context,
988                int fds[], unsigned n_fds,
989                char **environment,
990                bool apply_permissions,
991                bool apply_chroot,
992                bool apply_tty_stdin,
993                bool confirm_spawn,
994                CGroupBonding *cgroup_bondings,
995                CGroupAttribute *cgroup_attributes,
996                const char *cgroup_suffix,
997                const char *unit_id,
998                int idle_pipe[2],
999                pid_t *ret) {
1000
1001         pid_t pid;
1002         int r;
1003         char *line;
1004         int socket_fd;
1005         char _cleanup_strv_free_ **files_env = NULL;
1006
1007         assert(command);
1008         assert(context);
1009         assert(ret);
1010         assert(fds || n_fds <= 0);
1011
1012         if (context->std_input == EXEC_INPUT_SOCKET ||
1013             context->std_output == EXEC_OUTPUT_SOCKET ||
1014             context->std_error == EXEC_OUTPUT_SOCKET) {
1015
1016                 if (n_fds != 1)
1017                         return -EINVAL;
1018
1019                 socket_fd = fds[0];
1020
1021                 fds = NULL;
1022                 n_fds = 0;
1023         } else
1024                 socket_fd = -1;
1025
1026         r = exec_context_load_environment(context, &files_env);
1027         if (r < 0) {
1028                 log_struct_unit(LOG_ERR,
1029                            unit_id,
1030                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1031                            "ERRNO=%d", -r,
1032                            NULL);
1033                 return r;
1034         }
1035
1036         if (!argv)
1037                 argv = command->argv;
1038
1039         line = exec_command_line(argv);
1040         if (!line)
1041                 return log_oom();
1042
1043         log_struct_unit(LOG_DEBUG,
1044                    unit_id,
1045                    "MESSAGE=About to execute %s", line,
1046                    NULL);
1047         free(line);
1048
1049         r = cgroup_bonding_realize_list(cgroup_bondings);
1050         if (r < 0)
1051                 return r;
1052
1053         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1054
1055         pid = fork();
1056         if (pid < 0)
1057                 return -errno;
1058
1059         if (pid == 0) {
1060                 int i, err;
1061                 sigset_t ss;
1062                 const char *username = NULL, *home = NULL;
1063                 uid_t uid = (uid_t) -1;
1064                 gid_t gid = (gid_t) -1;
1065                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1066                         **final_env = NULL, **final_argv = NULL;
1067                 unsigned n_env = 0;
1068                 bool set_access = false;
1069
1070                 /* child */
1071
1072                 rename_process_from_path(command->path);
1073
1074                 /* We reset exactly these signals, since they are the
1075                  * only ones we set to SIG_IGN in the main daemon. All
1076                  * others we leave untouched because we set them to
1077                  * SIG_DFL or a valid handler initially, both of which
1078                  * will be demoted to SIG_DFL. */
1079                 default_signals(SIGNALS_CRASH_HANDLER,
1080                                 SIGNALS_IGNORE, -1);
1081
1082                 if (context->ignore_sigpipe)
1083                         ignore_signals(SIGPIPE, -1);
1084
1085                 assert_se(sigemptyset(&ss) == 0);
1086                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1087                         err = -errno;
1088                         r = EXIT_SIGNAL_MASK;
1089                         goto fail_child;
1090                 }
1091
1092                 if (idle_pipe) {
1093                         if (idle_pipe[1] >= 0)
1094                                 close_nointr_nofail(idle_pipe[1]);
1095                         if (idle_pipe[0] >= 0) {
1096                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1097                                 close_nointr_nofail(idle_pipe[0]);
1098                         }
1099                 }
1100
1101                 /* Close sockets very early to make sure we don't
1102                  * block init reexecution because it cannot bind its
1103                  * sockets */
1104                 log_forget_fds();
1105                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1106                                            socket_fd >= 0 ? 1 : n_fds);
1107                 if (err < 0) {
1108                         r = EXIT_FDS;
1109                         goto fail_child;
1110                 }
1111
1112                 if (!context->same_pgrp)
1113                         if (setsid() < 0) {
1114                                 err = -errno;
1115                                 r = EXIT_SETSID;
1116                                 goto fail_child;
1117                         }
1118
1119                 if (context->tcpwrap_name) {
1120                         if (socket_fd >= 0)
1121                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1122                                         err = -EACCES;
1123                                         r = EXIT_TCPWRAP;
1124                                         goto fail_child;
1125                                 }
1126
1127                         for (i = 0; i < (int) n_fds; i++) {
1128                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1129                                         err = -EACCES;
1130                                         r = EXIT_TCPWRAP;
1131                                         goto fail_child;
1132                                 }
1133                         }
1134                 }
1135
1136                 exec_context_tty_reset(context);
1137
1138                 if (confirm_spawn) {
1139                         char response;
1140
1141                         err = ask_for_confirmation(&response, argv);
1142                         if (err == -ETIMEDOUT)
1143                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1144                         else if (err < 0)
1145                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1146                         else if (response == 's') {
1147                                 write_confirm_message("Skipping execution.\n");
1148                                 err = -ECANCELED;
1149                                 r = EXIT_CONFIRM;
1150                                 goto fail_child;
1151                         } else if (response == 'n') {
1152                                 write_confirm_message("Failing execution.\n");
1153                                 err = r = 0;
1154                                 goto fail_child;
1155                         }
1156                 }
1157
1158                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1159                  * must sure to drop O_NONBLOCK */
1160                 if (socket_fd >= 0)
1161                         fd_nonblock(socket_fd, false);
1162
1163                 err = setup_input(context, socket_fd, apply_tty_stdin);
1164                 if (err < 0) {
1165                         r = EXIT_STDIN;
1166                         goto fail_child;
1167                 }
1168
1169                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1170                 if (err < 0) {
1171                         r = EXIT_STDOUT;
1172                         goto fail_child;
1173                 }
1174
1175                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1176                 if (err < 0) {
1177                         r = EXIT_STDERR;
1178                         goto fail_child;
1179                 }
1180
1181                 if (cgroup_bondings) {
1182                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1183                         if (err < 0) {
1184                                 r = EXIT_CGROUP;
1185                                 goto fail_child;
1186                         }
1187                 }
1188
1189                 if (context->oom_score_adjust_set) {
1190                         char t[16];
1191
1192                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1193                         char_array_0(t);
1194
1195                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1196                                 err = -errno;
1197                                 r = EXIT_OOM_ADJUST;
1198                                 goto fail_child;
1199                         }
1200                 }
1201
1202                 if (context->nice_set)
1203                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1204                                 err = -errno;
1205                                 r = EXIT_NICE;
1206                                 goto fail_child;
1207                         }
1208
1209                 if (context->cpu_sched_set) {
1210                         struct sched_param param;
1211
1212                         zero(param);
1213                         param.sched_priority = context->cpu_sched_priority;
1214
1215                         if (sched_setscheduler(0, context->cpu_sched_policy |
1216                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1217                                 err = -errno;
1218                                 r = EXIT_SETSCHEDULER;
1219                                 goto fail_child;
1220                         }
1221                 }
1222
1223                 if (context->cpuset)
1224                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1225                                 err = -errno;
1226                                 r = EXIT_CPUAFFINITY;
1227                                 goto fail_child;
1228                         }
1229
1230                 if (context->ioprio_set)
1231                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1232                                 err = -errno;
1233                                 r = EXIT_IOPRIO;
1234                                 goto fail_child;
1235                         }
1236
1237                 if (context->timer_slack_nsec != (nsec_t) -1)
1238                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1239                                 err = -errno;
1240                                 r = EXIT_TIMERSLACK;
1241                                 goto fail_child;
1242                         }
1243
1244                 if (context->utmp_id)
1245                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1246
1247                 if (context->user) {
1248                         username = context->user;
1249                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1250                         if (err < 0) {
1251                                 r = EXIT_USER;
1252                                 goto fail_child;
1253                         }
1254
1255                         if (is_terminal_input(context->std_input)) {
1256                                 err = chown_terminal(STDIN_FILENO, uid);
1257                                 if (err < 0) {
1258                                         r = EXIT_STDIN;
1259                                         goto fail_child;
1260                                 }
1261                         }
1262
1263                         if (cgroup_bondings && context->control_group_modify) {
1264                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1265                                 if (err >= 0)
1266                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1267                                 if (err < 0) {
1268                                         r = EXIT_CGROUP;
1269                                         goto fail_child;
1270                                 }
1271
1272                                 set_access = true;
1273                         }
1274                 }
1275
1276                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1277                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1278                         if (err < 0) {
1279                                 r = EXIT_CGROUP;
1280                                 goto fail_child;
1281                         }
1282                 }
1283
1284                 if (apply_permissions) {
1285                         err = enforce_groups(context, username, gid);
1286                         if (err < 0) {
1287                                 r = EXIT_GROUP;
1288                                 goto fail_child;
1289                         }
1290                 }
1291
1292                 umask(context->umask);
1293
1294 #ifdef HAVE_PAM
1295                 if (apply_permissions && context->pam_name && username) {
1296                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1297                         if (err < 0) {
1298                                 r = EXIT_PAM;
1299                                 goto fail_child;
1300                         }
1301                 }
1302 #endif
1303                 if (context->private_network) {
1304                         if (unshare(CLONE_NEWNET) < 0) {
1305                                 err = -errno;
1306                                 r = EXIT_NETWORK;
1307                                 goto fail_child;
1308                         }
1309
1310                         loopback_setup();
1311                 }
1312
1313                 if (strv_length(context->read_write_dirs) > 0 ||
1314                     strv_length(context->read_only_dirs) > 0 ||
1315                     strv_length(context->inaccessible_dirs) > 0 ||
1316                     context->mount_flags != 0 ||
1317                     context->private_tmp) {
1318                         err = setup_namespace(context->read_write_dirs,
1319                                               context->read_only_dirs,
1320                                               context->inaccessible_dirs,
1321                                               context->private_tmp,
1322                                               context->mount_flags);
1323                         if (err < 0) {
1324                                 r = EXIT_NAMESPACE;
1325                                 goto fail_child;
1326                         }
1327                 }
1328
1329                 if (apply_chroot) {
1330                         if (context->root_directory)
1331                                 if (chroot(context->root_directory) < 0) {
1332                                         err = -errno;
1333                                         r = EXIT_CHROOT;
1334                                         goto fail_child;
1335                                 }
1336
1337                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1338                                 err = -errno;
1339                                 r = EXIT_CHDIR;
1340                                 goto fail_child;
1341                         }
1342                 } else {
1343                         char _cleanup_free_ *d = NULL;
1344
1345                         if (asprintf(&d, "%s/%s",
1346                                      context->root_directory ? context->root_directory : "",
1347                                      context->working_directory ? context->working_directory : "") < 0) {
1348                                 err = -ENOMEM;
1349                                 r = EXIT_MEMORY;
1350                                 goto fail_child;
1351                         }
1352
1353                         if (chdir(d) < 0) {
1354                                 err = -errno;
1355                                 r = EXIT_CHDIR;
1356                                 goto fail_child;
1357                         }
1358                 }
1359
1360                 /* We repeat the fd closing here, to make sure that
1361                  * nothing is leaked from the PAM modules */
1362                 err = close_all_fds(fds, n_fds);
1363                 if (err >= 0)
1364                         err = shift_fds(fds, n_fds);
1365                 if (err >= 0)
1366                         err = flags_fds(fds, n_fds, context->non_blocking);
1367                 if (err < 0) {
1368                         r = EXIT_FDS;
1369                         goto fail_child;
1370                 }
1371
1372                 if (apply_permissions) {
1373
1374                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1375                                 if (!context->rlimit[i])
1376                                         continue;
1377
1378                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1379                                         err = -errno;
1380                                         r = EXIT_LIMITS;
1381                                         goto fail_child;
1382                                 }
1383                         }
1384
1385                         if (context->capability_bounding_set_drop) {
1386                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1387                                 if (err < 0) {
1388                                         r = EXIT_CAPABILITIES;
1389                                         goto fail_child;
1390                                 }
1391                         }
1392
1393                         if (context->user) {
1394                                 err = enforce_user(context, uid);
1395                                 if (err < 0) {
1396                                         r = EXIT_USER;
1397                                         goto fail_child;
1398                                 }
1399                         }
1400
1401                         /* PR_GET_SECUREBITS is not privileged, while
1402                          * PR_SET_SECUREBITS is. So to suppress
1403                          * potential EPERMs we'll try not to call
1404                          * PR_SET_SECUREBITS unless necessary. */
1405                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1406                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1407                                         err = -errno;
1408                                         r = EXIT_SECUREBITS;
1409                                         goto fail_child;
1410                                 }
1411
1412                         if (context->capabilities)
1413                                 if (cap_set_proc(context->capabilities) < 0) {
1414                                         err = -errno;
1415                                         r = EXIT_CAPABILITIES;
1416                                         goto fail_child;
1417                                 }
1418
1419                         if (context->no_new_privileges)
1420                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1421                                         err = -errno;
1422                                         r = EXIT_NO_NEW_PRIVILEGES;
1423                                         goto fail_child;
1424                                 }
1425
1426                         if (context->syscall_filter) {
1427                                 err = apply_seccomp(context->syscall_filter);
1428                                 if (err < 0) {
1429                                         r = EXIT_SECCOMP;
1430                                         goto fail_child;
1431                                 }
1432                         }
1433                 }
1434
1435                 if (!(our_env = new0(char*, 7))) {
1436                         err = -ENOMEM;
1437                         r = EXIT_MEMORY;
1438                         goto fail_child;
1439                 }
1440
1441                 if (n_fds > 0)
1442                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1443                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1444                                 err = -ENOMEM;
1445                                 r = EXIT_MEMORY;
1446                                 goto fail_child;
1447                         }
1448
1449                 if (home)
1450                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1451                                 err = -ENOMEM;
1452                                 r = EXIT_MEMORY;
1453                                 goto fail_child;
1454                         }
1455
1456                 if (username)
1457                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1458                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1459                                 err = -ENOMEM;
1460                                 r = EXIT_MEMORY;
1461                                 goto fail_child;
1462                         }
1463
1464                 if (is_terminal_input(context->std_input) ||
1465                     context->std_output == EXEC_OUTPUT_TTY ||
1466                     context->std_error == EXEC_OUTPUT_TTY)
1467                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1468                                 err = -ENOMEM;
1469                                 r = EXIT_MEMORY;
1470                                 goto fail_child;
1471                         }
1472
1473                 assert(n_env <= 7);
1474
1475                 if (!(final_env = strv_env_merge(
1476                                       5,
1477                                       environment,
1478                                       our_env,
1479                                       context->environment,
1480                                       files_env,
1481                                       pam_env,
1482                                       NULL))) {
1483                         err = -ENOMEM;
1484                         r = EXIT_MEMORY;
1485                         goto fail_child;
1486                 }
1487
1488                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1489                         err = -ENOMEM;
1490                         r = EXIT_MEMORY;
1491                         goto fail_child;
1492                 }
1493
1494                 final_env = strv_env_clean(final_env);
1495
1496                 execve(command->path, final_argv, final_env);
1497                 err = -errno;
1498                 r = EXIT_EXEC;
1499
1500         fail_child:
1501                 if (r != 0) {
1502                         log_open();
1503                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1504                                    "EXECUTABLE=%s", command->path,
1505                                    "MESSAGE=Failed at step %s spawning %s: %s",
1506                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1507                                           command->path, strerror(-err),
1508                                    "ERRNO=%d", -err,
1509                                    NULL);
1510                         log_close();
1511                 }
1512
1513                 _exit(r);
1514         }
1515
1516         log_struct_unit(LOG_DEBUG,
1517                    unit_id,
1518                    "MESSAGE=Forked %s as %lu",
1519                           command->path, (unsigned long) pid,
1520                    NULL);
1521
1522         /* We add the new process to the cgroup both in the child (so
1523          * that we can be sure that no user code is ever executed
1524          * outside of the cgroup) and in the parent (so that we can be
1525          * sure that when we kill the cgroup the process will be
1526          * killed too). */
1527         if (cgroup_bondings)
1528                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1529
1530         exec_status_start(&command->exec_status, pid);
1531
1532         *ret = pid;
1533         return 0;
1534 }
1535
1536 void exec_context_init(ExecContext *c) {
1537         assert(c);
1538
1539         c->umask = 0022;
1540         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1541         c->cpu_sched_policy = SCHED_OTHER;
1542         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1543         c->syslog_level_prefix = true;
1544         c->control_group_persistent = -1;
1545         c->ignore_sigpipe = true;
1546         c->timer_slack_nsec = (nsec_t) -1;
1547 }
1548
1549 void exec_context_done(ExecContext *c) {
1550         unsigned l;
1551
1552         assert(c);
1553
1554         strv_free(c->environment);
1555         c->environment = NULL;
1556
1557         strv_free(c->environment_files);
1558         c->environment_files = NULL;
1559
1560         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1561                 free(c->rlimit[l]);
1562                 c->rlimit[l] = NULL;
1563         }
1564
1565         free(c->working_directory);
1566         c->working_directory = NULL;
1567         free(c->root_directory);
1568         c->root_directory = NULL;
1569
1570         free(c->tty_path);
1571         c->tty_path = NULL;
1572
1573         free(c->tcpwrap_name);
1574         c->tcpwrap_name = NULL;
1575
1576         free(c->syslog_identifier);
1577         c->syslog_identifier = NULL;
1578
1579         free(c->user);
1580         c->user = NULL;
1581
1582         free(c->group);
1583         c->group = NULL;
1584
1585         strv_free(c->supplementary_groups);
1586         c->supplementary_groups = NULL;
1587
1588         free(c->pam_name);
1589         c->pam_name = NULL;
1590
1591         if (c->capabilities) {
1592                 cap_free(c->capabilities);
1593                 c->capabilities = NULL;
1594         }
1595
1596         strv_free(c->read_only_dirs);
1597         c->read_only_dirs = NULL;
1598
1599         strv_free(c->read_write_dirs);
1600         c->read_write_dirs = NULL;
1601
1602         strv_free(c->inaccessible_dirs);
1603         c->inaccessible_dirs = NULL;
1604
1605         if (c->cpuset)
1606                 CPU_FREE(c->cpuset);
1607
1608         free(c->utmp_id);
1609         c->utmp_id = NULL;
1610
1611         free(c->syscall_filter);
1612         c->syscall_filter = NULL;
1613 }
1614
1615 void exec_command_done(ExecCommand *c) {
1616         assert(c);
1617
1618         free(c->path);
1619         c->path = NULL;
1620
1621         strv_free(c->argv);
1622         c->argv = NULL;
1623 }
1624
1625 void exec_command_done_array(ExecCommand *c, unsigned n) {
1626         unsigned i;
1627
1628         for (i = 0; i < n; i++)
1629                 exec_command_done(c+i);
1630 }
1631
1632 void exec_command_free_list(ExecCommand *c) {
1633         ExecCommand *i;
1634
1635         while ((i = c)) {
1636                 LIST_REMOVE(ExecCommand, command, c, i);
1637                 exec_command_done(i);
1638                 free(i);
1639         }
1640 }
1641
1642 void exec_command_free_array(ExecCommand **c, unsigned n) {
1643         unsigned i;
1644
1645         for (i = 0; i < n; i++) {
1646                 exec_command_free_list(c[i]);
1647                 c[i] = NULL;
1648         }
1649 }
1650
1651 int exec_context_load_environment(const ExecContext *c, char ***l) {
1652         char **i, **r = NULL;
1653
1654         assert(c);
1655         assert(l);
1656
1657         STRV_FOREACH(i, c->environment_files) {
1658                 char *fn;
1659                 int k;
1660                 bool ignore = false;
1661                 char **p;
1662                 glob_t pglob;
1663                 int count, n;
1664
1665                 fn = *i;
1666
1667                 if (fn[0] == '-') {
1668                         ignore = true;
1669                         fn ++;
1670                 }
1671
1672                 if (!path_is_absolute(fn)) {
1673
1674                         if (ignore)
1675                                 continue;
1676
1677                         strv_free(r);
1678                         return -EINVAL;
1679                 }
1680
1681                 /* Filename supports globbing, take all matching files */
1682                 zero(pglob);
1683                 errno = 0;
1684                 if (glob(fn, 0, NULL, &pglob) != 0) {
1685                         globfree(&pglob);
1686                         if (ignore)
1687                                 continue;
1688
1689                         strv_free(r);
1690                         return errno ? -errno : -EINVAL;
1691                 }
1692                 count = pglob.gl_pathc;
1693                 if (count == 0) {
1694                         globfree(&pglob);
1695                         if (ignore)
1696                                 continue;
1697
1698                         strv_free(r);
1699                         return -EINVAL;
1700                 }
1701                 for (n = 0; n < count; n++) {
1702                         k = load_env_file(pglob.gl_pathv[n], &p);
1703                         if (k < 0) {
1704                                 if (ignore)
1705                                         continue;
1706
1707                                 strv_free(r);
1708                                 globfree(&pglob);
1709                                 return k;
1710                          }
1711
1712                         if (r == NULL)
1713                                 r = p;
1714                         else {
1715                                 char **m;
1716
1717                                 m = strv_env_merge(2, r, p);
1718                                 strv_free(r);
1719                                 strv_free(p);
1720
1721                                 if (!m) {
1722                                         globfree(&pglob);
1723                                         return -ENOMEM;
1724                                 }
1725
1726                                 r = m;
1727                         }
1728                 }
1729                 globfree(&pglob);
1730         }
1731
1732         *l = r;
1733
1734         return 0;
1735 }
1736
1737 static void strv_fprintf(FILE *f, char **l) {
1738         char **g;
1739
1740         assert(f);
1741
1742         STRV_FOREACH(g, l)
1743                 fprintf(f, " %s", *g);
1744 }
1745
1746 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1747         char ** e;
1748         unsigned i;
1749
1750         assert(c);
1751         assert(f);
1752
1753         if (!prefix)
1754                 prefix = "";
1755
1756         fprintf(f,
1757                 "%sUMask: %04o\n"
1758                 "%sWorkingDirectory: %s\n"
1759                 "%sRootDirectory: %s\n"
1760                 "%sNonBlocking: %s\n"
1761                 "%sPrivateTmp: %s\n"
1762                 "%sControlGroupModify: %s\n"
1763                 "%sControlGroupPersistent: %s\n"
1764                 "%sPrivateNetwork: %s\n"
1765                 "%sIgnoreSIGPIPE: %s\n",
1766                 prefix, c->umask,
1767                 prefix, c->working_directory ? c->working_directory : "/",
1768                 prefix, c->root_directory ? c->root_directory : "/",
1769                 prefix, yes_no(c->non_blocking),
1770                 prefix, yes_no(c->private_tmp),
1771                 prefix, yes_no(c->control_group_modify),
1772                 prefix, yes_no(c->control_group_persistent),
1773                 prefix, yes_no(c->private_network),
1774                 prefix, yes_no(c->ignore_sigpipe));
1775
1776         STRV_FOREACH(e, c->environment)
1777                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1778
1779         STRV_FOREACH(e, c->environment_files)
1780                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1781
1782         if (c->tcpwrap_name)
1783                 fprintf(f,
1784                         "%sTCPWrapName: %s\n",
1785                         prefix, c->tcpwrap_name);
1786
1787         if (c->nice_set)
1788                 fprintf(f,
1789                         "%sNice: %i\n",
1790                         prefix, c->nice);
1791
1792         if (c->oom_score_adjust_set)
1793                 fprintf(f,
1794                         "%sOOMScoreAdjust: %i\n",
1795                         prefix, c->oom_score_adjust);
1796
1797         for (i = 0; i < RLIM_NLIMITS; i++)
1798                 if (c->rlimit[i])
1799                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1800
1801         if (c->ioprio_set) {
1802                 char *class_str;
1803                 int r;
1804
1805                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1806                 if (r < 0)
1807                         class_str = NULL;
1808                 fprintf(f,
1809                         "%sIOSchedulingClass: %s\n"
1810                         "%sIOPriority: %i\n",
1811                         prefix, strna(class_str),
1812                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1813                 free(class_str);
1814         }
1815
1816         if (c->cpu_sched_set) {
1817                 char *policy_str;
1818                 int r;
1819
1820                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1821                 if (r < 0)
1822                         policy_str = NULL;
1823                 fprintf(f,
1824                         "%sCPUSchedulingPolicy: %s\n"
1825                         "%sCPUSchedulingPriority: %i\n"
1826                         "%sCPUSchedulingResetOnFork: %s\n",
1827                         prefix, strna(policy_str),
1828                         prefix, c->cpu_sched_priority,
1829                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1830                 free(policy_str);
1831         }
1832
1833         if (c->cpuset) {
1834                 fprintf(f, "%sCPUAffinity:", prefix);
1835                 for (i = 0; i < c->cpuset_ncpus; i++)
1836                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1837                                 fprintf(f, " %i", i);
1838                 fputs("\n", f);
1839         }
1840
1841         if (c->timer_slack_nsec != (nsec_t) -1)
1842                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1843
1844         fprintf(f,
1845                 "%sStandardInput: %s\n"
1846                 "%sStandardOutput: %s\n"
1847                 "%sStandardError: %s\n",
1848                 prefix, exec_input_to_string(c->std_input),
1849                 prefix, exec_output_to_string(c->std_output),
1850                 prefix, exec_output_to_string(c->std_error));
1851
1852         if (c->tty_path)
1853                 fprintf(f,
1854                         "%sTTYPath: %s\n"
1855                         "%sTTYReset: %s\n"
1856                         "%sTTYVHangup: %s\n"
1857                         "%sTTYVTDisallocate: %s\n",
1858                         prefix, c->tty_path,
1859                         prefix, yes_no(c->tty_reset),
1860                         prefix, yes_no(c->tty_vhangup),
1861                         prefix, yes_no(c->tty_vt_disallocate));
1862
1863         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1864             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1865             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1866             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1867                 char *fac_str, *lvl_str;
1868                 int r;
1869
1870                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1871                 if (r < 0)
1872                         fac_str = NULL;
1873
1874                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1875                 if (r < 0)
1876                         lvl_str = NULL;
1877
1878                 fprintf(f,
1879                         "%sSyslogFacility: %s\n"
1880                         "%sSyslogLevel: %s\n",
1881                         prefix, strna(fac_str),
1882                         prefix, strna(lvl_str));
1883                 free(lvl_str);
1884                 free(fac_str);
1885         }
1886
1887         if (c->capabilities) {
1888                 char *t;
1889                 if ((t = cap_to_text(c->capabilities, NULL))) {
1890                         fprintf(f, "%sCapabilities: %s\n",
1891                                 prefix, t);
1892                         cap_free(t);
1893                 }
1894         }
1895
1896         if (c->secure_bits)
1897                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1898                         prefix,
1899                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1900                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1901                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1902                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1903                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1904                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1905
1906         if (c->capability_bounding_set_drop) {
1907                 unsigned long l;
1908                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1909
1910                 for (l = 0; l <= cap_last_cap(); l++)
1911                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1912                                 char *t;
1913
1914                                 if ((t = cap_to_name(l))) {
1915                                         fprintf(f, " %s", t);
1916                                         cap_free(t);
1917                                 }
1918                         }
1919
1920                 fputs("\n", f);
1921         }
1922
1923         if (c->user)
1924                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1925         if (c->group)
1926                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1927
1928         if (strv_length(c->supplementary_groups) > 0) {
1929                 fprintf(f, "%sSupplementaryGroups:", prefix);
1930                 strv_fprintf(f, c->supplementary_groups);
1931                 fputs("\n", f);
1932         }
1933
1934         if (c->pam_name)
1935                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1936
1937         if (strv_length(c->read_write_dirs) > 0) {
1938                 fprintf(f, "%sReadWriteDirs:", prefix);
1939                 strv_fprintf(f, c->read_write_dirs);
1940                 fputs("\n", f);
1941         }
1942
1943         if (strv_length(c->read_only_dirs) > 0) {
1944                 fprintf(f, "%sReadOnlyDirs:", prefix);
1945                 strv_fprintf(f, c->read_only_dirs);
1946                 fputs("\n", f);
1947         }
1948
1949         if (strv_length(c->inaccessible_dirs) > 0) {
1950                 fprintf(f, "%sInaccessibleDirs:", prefix);
1951                 strv_fprintf(f, c->inaccessible_dirs);
1952                 fputs("\n", f);
1953         }
1954
1955         if (c->utmp_id)
1956                 fprintf(f,
1957                         "%sUtmpIdentifier: %s\n",
1958                         prefix, c->utmp_id);
1959 }
1960
1961 void exec_status_start(ExecStatus *s, pid_t pid) {
1962         assert(s);
1963
1964         zero(*s);
1965         s->pid = pid;
1966         dual_timestamp_get(&s->start_timestamp);
1967 }
1968
1969 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1970         assert(s);
1971
1972         if (s->pid && s->pid != pid)
1973                 zero(*s);
1974
1975         s->pid = pid;
1976         dual_timestamp_get(&s->exit_timestamp);
1977
1978         s->code = code;
1979         s->status = status;
1980
1981         if (context) {
1982                 if (context->utmp_id)
1983                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1984
1985                 exec_context_tty_reset(context);
1986         }
1987 }
1988
1989 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1990         char buf[FORMAT_TIMESTAMP_MAX];
1991
1992         assert(s);
1993         assert(f);
1994
1995         if (!prefix)
1996                 prefix = "";
1997
1998         if (s->pid <= 0)
1999                 return;
2000
2001         fprintf(f,
2002                 "%sPID: %lu\n",
2003                 prefix, (unsigned long) s->pid);
2004
2005         if (s->start_timestamp.realtime > 0)
2006                 fprintf(f,
2007                         "%sStart Timestamp: %s\n",
2008                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2009
2010         if (s->exit_timestamp.realtime > 0)
2011                 fprintf(f,
2012                         "%sExit Timestamp: %s\n"
2013                         "%sExit Code: %s\n"
2014                         "%sExit Status: %i\n",
2015                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2016                         prefix, sigchld_code_to_string(s->code),
2017                         prefix, s->status);
2018 }
2019
2020 char *exec_command_line(char **argv) {
2021         size_t k;
2022         char *n, *p, **a;
2023         bool first = true;
2024
2025         assert(argv);
2026
2027         k = 1;
2028         STRV_FOREACH(a, argv)
2029                 k += strlen(*a)+3;
2030
2031         if (!(n = new(char, k)))
2032                 return NULL;
2033
2034         p = n;
2035         STRV_FOREACH(a, argv) {
2036
2037                 if (!first)
2038                         *(p++) = ' ';
2039                 else
2040                         first = false;
2041
2042                 if (strpbrk(*a, WHITESPACE)) {
2043                         *(p++) = '\'';
2044                         p = stpcpy(p, *a);
2045                         *(p++) = '\'';
2046                 } else
2047                         p = stpcpy(p, *a);
2048
2049         }
2050
2051         *p = 0;
2052
2053         /* FIXME: this doesn't really handle arguments that have
2054          * spaces and ticks in them */
2055
2056         return n;
2057 }
2058
2059 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2060         char *p2;
2061         const char *prefix2;
2062
2063         char *cmd;
2064
2065         assert(c);
2066         assert(f);
2067
2068         if (!prefix)
2069                 prefix = "";
2070         p2 = strappend(prefix, "\t");
2071         prefix2 = p2 ? p2 : prefix;
2072
2073         cmd = exec_command_line(c->argv);
2074
2075         fprintf(f,
2076                 "%sCommand Line: %s\n",
2077                 prefix, cmd ? cmd : strerror(ENOMEM));
2078
2079         free(cmd);
2080
2081         exec_status_dump(&c->exec_status, f, prefix2);
2082
2083         free(p2);
2084 }
2085
2086 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2087         assert(f);
2088
2089         if (!prefix)
2090                 prefix = "";
2091
2092         LIST_FOREACH(command, c, c)
2093                 exec_command_dump(c, f, prefix);
2094 }
2095
2096 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2097         ExecCommand *end;
2098
2099         assert(l);
2100         assert(e);
2101
2102         if (*l) {
2103                 /* It's kind of important, that we keep the order here */
2104                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2105                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2106         } else
2107               *l = e;
2108 }
2109
2110 int exec_command_set(ExecCommand *c, const char *path, ...) {
2111         va_list ap;
2112         char **l, *p;
2113
2114         assert(c);
2115         assert(path);
2116
2117         va_start(ap, path);
2118         l = strv_new_ap(path, ap);
2119         va_end(ap);
2120
2121         if (!l)
2122                 return -ENOMEM;
2123
2124         if (!(p = strdup(path))) {
2125                 strv_free(l);
2126                 return -ENOMEM;
2127         }
2128
2129         free(c->path);
2130         c->path = p;
2131
2132         strv_free(c->argv);
2133         c->argv = l;
2134
2135         return 0;
2136 }
2137
2138 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2139         [EXEC_INPUT_NULL] = "null",
2140         [EXEC_INPUT_TTY] = "tty",
2141         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2142         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2143         [EXEC_INPUT_SOCKET] = "socket"
2144 };
2145
2146 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2147
2148 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2149         [EXEC_OUTPUT_INHERIT] = "inherit",
2150         [EXEC_OUTPUT_NULL] = "null",
2151         [EXEC_OUTPUT_TTY] = "tty",
2152         [EXEC_OUTPUT_SYSLOG] = "syslog",
2153         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2154         [EXEC_OUTPUT_KMSG] = "kmsg",
2155         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2156         [EXEC_OUTPUT_JOURNAL] = "journal",
2157         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2158         [EXEC_OUTPUT_SOCKET] = "socket"
2159 };
2160
2161 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);