chiark / gitweb /
shared, core: do not always accept numbers in string lookups
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42
43 #ifdef HAVE_PAM
44 #include <security/pam_appl.h>
45 #endif
46
47 #include "execute.h"
48 #include "strv.h"
49 #include "macro.h"
50 #include "capability.h"
51 #include "util.h"
52 #include "log.h"
53 #include "ioprio.h"
54 #include "securebits.h"
55 #include "cgroup.h"
56 #include "namespace.h"
57 #include "tcpwrap.h"
58 #include "exit-status.h"
59 #include "missing.h"
60 #include "utmp-wtmp.h"
61 #include "def.h"
62 #include "loopback-setup.h"
63 #include "path-util.h"
64 #include "syscall-list.h"
65 #include "sd-id128.h"
66 #include "sd-messages.h"
67
68 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
69
70 /* This assumes there is a 'tty' group */
71 #define TTY_MODE 0620
72
73 static int shift_fds(int fds[], unsigned n_fds) {
74         int start, restart_from;
75
76         if (n_fds <= 0)
77                 return 0;
78
79         /* Modifies the fds array! (sorts it) */
80
81         assert(fds);
82
83         start = 0;
84         for (;;) {
85                 int i;
86
87                 restart_from = -1;
88
89                 for (i = start; i < (int) n_fds; i++) {
90                         int nfd;
91
92                         /* Already at right index? */
93                         if (fds[i] == i+3)
94                                 continue;
95
96                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
97                                 return -errno;
98
99                         close_nointr_nofail(fds[i]);
100                         fds[i] = nfd;
101
102                         /* Hmm, the fd we wanted isn't free? Then
103                          * let's remember that and try again from here*/
104                         if (nfd != i+3 && restart_from < 0)
105                                 restart_from = i;
106                 }
107
108                 if (restart_from < 0)
109                         break;
110
111                 start = restart_from;
112         }
113
114         return 0;
115 }
116
117 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
118         unsigned i;
119         int r;
120
121         if (n_fds <= 0)
122                 return 0;
123
124         assert(fds);
125
126         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
127
128         for (i = 0; i < n_fds; i++) {
129
130                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
131                         return r;
132
133                 /* We unconditionally drop FD_CLOEXEC from the fds,
134                  * since after all we want to pass these fds to our
135                  * children */
136
137                 if ((r = fd_cloexec(fds[i], false)) < 0)
138                         return r;
139         }
140
141         return 0;
142 }
143
144 static const char *tty_path(const ExecContext *context) {
145         assert(context);
146
147         if (context->tty_path)
148                 return context->tty_path;
149
150         return "/dev/console";
151 }
152
153 void exec_context_tty_reset(const ExecContext *context) {
154         assert(context);
155
156         if (context->tty_vhangup)
157                 terminal_vhangup(tty_path(context));
158
159         if (context->tty_reset)
160                 reset_terminal(tty_path(context));
161
162         if (context->tty_vt_disallocate && context->tty_path)
163                 vt_disallocate(context->tty_path);
164 }
165
166 static int open_null_as(int flags, int nfd) {
167         int fd, r;
168
169         assert(nfd >= 0);
170
171         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
172                 return -errno;
173
174         if (fd != nfd) {
175                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
176                 close_nointr_nofail(fd);
177         } else
178                 r = nfd;
179
180         return r;
181 }
182
183 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
184         int fd, r;
185         union sockaddr_union sa;
186
187         assert(context);
188         assert(output < _EXEC_OUTPUT_MAX);
189         assert(ident);
190         assert(nfd >= 0);
191
192         fd = socket(AF_UNIX, SOCK_STREAM, 0);
193         if (fd < 0)
194                 return -errno;
195
196         zero(sa);
197         sa.un.sun_family = AF_UNIX;
198         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
199
200         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
201         if (r < 0) {
202                 close_nointr_nofail(fd);
203                 return -errno;
204         }
205
206         if (shutdown(fd, SHUT_RD) < 0) {
207                 close_nointr_nofail(fd);
208                 return -errno;
209         }
210
211         dprintf(fd,
212                 "%s\n"
213                 "%s\n"
214                 "%i\n"
215                 "%i\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n",
219                 context->syslog_identifier ? context->syslog_identifier : ident,
220                 unit_id,
221                 context->syslog_priority,
222                 !!context->syslog_level_prefix,
223                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
224                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
225                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
226
227         if (fd != nfd) {
228                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
229                 close_nointr_nofail(fd);
230         } else
231                 r = nfd;
232
233         return r;
234 }
235 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
236         int fd, r;
237
238         assert(path);
239         assert(nfd >= 0);
240
241         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
242                 return fd;
243
244         if (fd != nfd) {
245                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
246                 close_nointr_nofail(fd);
247         } else
248                 r = nfd;
249
250         return r;
251 }
252
253 static bool is_terminal_input(ExecInput i) {
254         return
255                 i == EXEC_INPUT_TTY ||
256                 i == EXEC_INPUT_TTY_FORCE ||
257                 i == EXEC_INPUT_TTY_FAIL;
258 }
259
260 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
261
262         if (is_terminal_input(std_input) && !apply_tty_stdin)
263                 return EXEC_INPUT_NULL;
264
265         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
266                 return EXEC_INPUT_NULL;
267
268         return std_input;
269 }
270
271 static int fixup_output(ExecOutput std_output, int socket_fd) {
272
273         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
274                 return EXEC_OUTPUT_INHERIT;
275
276         return std_output;
277 }
278
279 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
280         ExecInput i;
281
282         assert(context);
283
284         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
285
286         switch (i) {
287
288         case EXEC_INPUT_NULL:
289                 return open_null_as(O_RDONLY, STDIN_FILENO);
290
291         case EXEC_INPUT_TTY:
292         case EXEC_INPUT_TTY_FORCE:
293         case EXEC_INPUT_TTY_FAIL: {
294                 int fd, r;
295
296                 if ((fd = acquire_terminal(
297                                      tty_path(context),
298                                      i == EXEC_INPUT_TTY_FAIL,
299                                      i == EXEC_INPUT_TTY_FORCE,
300                                      false,
301                                      (usec_t) -1)) < 0)
302                         return fd;
303
304                 if (fd != STDIN_FILENO) {
305                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306                         close_nointr_nofail(fd);
307                 } else
308                         r = STDIN_FILENO;
309
310                 return r;
311         }
312
313         case EXEC_INPUT_SOCKET:
314                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
315
316         default:
317                 assert_not_reached("Unknown input type");
318         }
319 }
320
321 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
322         ExecOutput o;
323         ExecInput i;
324
325         assert(context);
326         assert(ident);
327
328         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
329         o = fixup_output(context->std_output, socket_fd);
330
331         /* This expects the input is already set up */
332
333         switch (o) {
334
335         case EXEC_OUTPUT_INHERIT:
336
337                 /* If input got downgraded, inherit the original value */
338                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
339                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
340
341                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
342                 if (i != EXEC_INPUT_NULL)
343                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
344
345                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
346                 if (getppid() != 1)
347                         return STDOUT_FILENO;
348
349                 /* We need to open /dev/null here anew, to get the
350                  * right access mode. So we fall through */
351
352         case EXEC_OUTPUT_NULL:
353                 return open_null_as(O_WRONLY, STDOUT_FILENO);
354
355         case EXEC_OUTPUT_TTY:
356                 if (is_terminal_input(i))
357                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
358
359                 /* We don't reset the terminal if this is just about output */
360                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
361
362         case EXEC_OUTPUT_SYSLOG:
363         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
364         case EXEC_OUTPUT_KMSG:
365         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
366         case EXEC_OUTPUT_JOURNAL:
367         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
368                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
369
370         case EXEC_OUTPUT_SOCKET:
371                 assert(socket_fd >= 0);
372                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
373
374         default:
375                 assert_not_reached("Unknown output type");
376         }
377 }
378
379 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
380         ExecOutput o, e;
381         ExecInput i;
382
383         assert(context);
384         assert(ident);
385
386         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
387         o = fixup_output(context->std_output, socket_fd);
388         e = fixup_output(context->std_error, socket_fd);
389
390         /* This expects the input and output are already set up */
391
392         /* Don't change the stderr file descriptor if we inherit all
393          * the way and are not on a tty */
394         if (e == EXEC_OUTPUT_INHERIT &&
395             o == EXEC_OUTPUT_INHERIT &&
396             i == EXEC_INPUT_NULL &&
397             !is_terminal_input(context->std_input) &&
398             getppid () != 1)
399                 return STDERR_FILENO;
400
401         /* Duplicate from stdout if possible */
402         if (e == o || e == EXEC_OUTPUT_INHERIT)
403                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
404
405         switch (e) {
406
407         case EXEC_OUTPUT_NULL:
408                 return open_null_as(O_WRONLY, STDERR_FILENO);
409
410         case EXEC_OUTPUT_TTY:
411                 if (is_terminal_input(i))
412                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
413
414                 /* We don't reset the terminal if this is just about output */
415                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
416
417         case EXEC_OUTPUT_SYSLOG:
418         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
419         case EXEC_OUTPUT_KMSG:
420         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
421         case EXEC_OUTPUT_JOURNAL:
422         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
423                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
424
425         case EXEC_OUTPUT_SOCKET:
426                 assert(socket_fd >= 0);
427                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
428
429         default:
430                 assert_not_reached("Unknown error type");
431         }
432 }
433
434 static int chown_terminal(int fd, uid_t uid) {
435         struct stat st;
436
437         assert(fd >= 0);
438
439         /* This might fail. What matters are the results. */
440         (void) fchown(fd, uid, -1);
441         (void) fchmod(fd, TTY_MODE);
442
443         if (fstat(fd, &st) < 0)
444                 return -errno;
445
446         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
447                 return -EPERM;
448
449         return 0;
450 }
451
452 static int setup_confirm_stdio(int *_saved_stdin,
453                                int *_saved_stdout) {
454         int fd = -1, saved_stdin, saved_stdout = -1, r;
455
456         assert(_saved_stdin);
457         assert(_saved_stdout);
458
459         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
460         if (saved_stdin < 0)
461                 return -errno;
462
463         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
464         if (saved_stdout < 0) {
465                 r = errno;
466                 goto fail;
467         }
468
469         fd = acquire_terminal(
470                         "/dev/console",
471                         false,
472                         false,
473                         false,
474                         DEFAULT_CONFIRM_USEC);
475         if (fd < 0) {
476                 r = fd;
477                 goto fail;
478         }
479
480         r = chown_terminal(fd, getuid());
481         if (r < 0)
482                 goto fail;
483
484         if (dup2(fd, STDIN_FILENO) < 0) {
485                 r = -errno;
486                 goto fail;
487         }
488
489         if (dup2(fd, STDOUT_FILENO) < 0) {
490                 r = -errno;
491                 goto fail;
492         }
493
494         if (fd >= 2)
495                 close_nointr_nofail(fd);
496
497         *_saved_stdin = saved_stdin;
498         *_saved_stdout = saved_stdout;
499
500         return 0;
501
502 fail:
503         if (saved_stdout >= 0)
504                 close_nointr_nofail(saved_stdout);
505
506         if (saved_stdin >= 0)
507                 close_nointr_nofail(saved_stdin);
508
509         if (fd >= 0)
510                 close_nointr_nofail(fd);
511
512         return r;
513 }
514
515 static int write_confirm_message(const char *format, ...) {
516         int fd;
517         va_list ap;
518
519         assert(format);
520
521         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
522         if (fd < 0)
523                 return fd;
524
525         va_start(ap, format);
526         vdprintf(fd, format, ap);
527         va_end(ap);
528
529         close_nointr_nofail(fd);
530
531         return 0;
532 }
533
534 static int restore_confirm_stdio(int *saved_stdin,
535                                  int *saved_stdout) {
536
537         int r = 0;
538
539         assert(saved_stdin);
540         assert(saved_stdout);
541
542         release_terminal();
543
544         if (*saved_stdin >= 0)
545                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
546                         r = -errno;
547
548         if (*saved_stdout >= 0)
549                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdin >= 0)
553                 close_nointr_nofail(*saved_stdin);
554
555         if (*saved_stdout >= 0)
556                 close_nointr_nofail(*saved_stdout);
557
558         return r;
559 }
560
561 static int ask_for_confirmation(char *response, char **argv) {
562         int saved_stdout = -1, saved_stdin = -1, r;
563         char *line;
564
565         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
566         if (r < 0)
567                 return r;
568
569         line = exec_command_line(argv);
570         if (!line)
571                 return -ENOMEM;
572
573         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
574         free(line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         int r;
662         assert(context);
663
664         /* Sets (but doesn't lookup) the uid and make sure we keep the
665          * capabilities while doing so. */
666
667         if (context->capabilities) {
668                 cap_t d;
669                 static const cap_value_t bits[] = {
670                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
671                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
672                 };
673
674                 /* First step: If we need to keep capabilities but
675                  * drop privileges we need to make sure we keep our
676                  * caps, whiel we drop privileges. */
677                 if (uid != 0) {
678                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
679
680                         if (prctl(PR_GET_SECUREBITS) != sb)
681                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682                                         return -errno;
683                 }
684
685                 /* Second step: set the capabilities. This will reduce
686                  * the capabilities to the minimum we need. */
687
688                 if (!(d = cap_dup(context->capabilities)))
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
693                         r = -errno;
694                         cap_free(d);
695                         return r;
696                 }
697
698                 if (cap_set_proc(d) < 0) {
699                         r = -errno;
700                         cap_free(d);
701                         return r;
702                 }
703
704                 cap_free(d);
705         }
706
707         /* Third step: actually set the uids */
708         if (setresuid(uid, uid, uid) < 0)
709                 return -errno;
710
711         /* At this point we should have all necessary capabilities but
712            are otherwise a normal user. However, the caps might got
713            corrupted due to the setresuid() so we need clean them up
714            later. This is done outside of this call. */
715
716         return 0;
717 }
718
719 #ifdef HAVE_PAM
720
721 static int null_conv(
722                 int num_msg,
723                 const struct pam_message **msg,
724                 struct pam_response **resp,
725                 void *appdata_ptr) {
726
727         /* We don't support conversations */
728
729         return PAM_CONV_ERR;
730 }
731
732 static int setup_pam(
733                 const char *name,
734                 const char *user,
735                 uid_t uid,
736                 const char *tty,
737                 char ***pam_env,
738                 int fds[], unsigned n_fds) {
739
740         static const struct pam_conv conv = {
741                 .conv = null_conv,
742                 .appdata_ptr = NULL
743         };
744
745         pam_handle_t *handle = NULL;
746         sigset_t ss, old_ss;
747         int pam_code = PAM_SUCCESS;
748         int err;
749         char **e = NULL;
750         bool close_session = false;
751         pid_t pam_pid = 0, parent_pid;
752
753         assert(name);
754         assert(user);
755         assert(pam_env);
756
757         /* We set up PAM in the parent process, then fork. The child
758          * will then stay around until killed via PR_GET_PDEATHSIG or
759          * systemd via the cgroup logic. It will then remove the PAM
760          * session again. The parent process will exec() the actual
761          * daemon. We do things this way to ensure that the main PID
762          * of the daemon is the one we initially fork()ed. */
763
764         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
765                 handle = NULL;
766                 goto fail;
767         }
768
769         if (tty)
770                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
771                         goto fail;
772
773         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
774                 goto fail;
775
776         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
777                 goto fail;
778
779         close_session = true;
780
781         if ((!(e = pam_getenvlist(handle)))) {
782                 pam_code = PAM_BUF_ERR;
783                 goto fail;
784         }
785
786         /* Block SIGTERM, so that we know that it won't get lost in
787          * the child */
788         if (sigemptyset(&ss) < 0 ||
789             sigaddset(&ss, SIGTERM) < 0 ||
790             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
791                 goto fail;
792
793         parent_pid = getpid();
794
795         if ((pam_pid = fork()) < 0)
796                 goto fail;
797
798         if (pam_pid == 0) {
799                 int sig;
800                 int r = EXIT_PAM;
801
802                 /* The child's job is to reset the PAM session on
803                  * termination */
804
805                 /* This string must fit in 10 chars (i.e. the length
806                  * of "/sbin/init"), to look pretty in /bin/ps */
807                 rename_process("(sd-pam)");
808
809                 /* Make sure we don't keep open the passed fds in this
810                 child. We assume that otherwise only those fds are
811                 open here that have been opened by PAM. */
812                 close_many(fds, n_fds);
813
814                 /* Drop privileges - we don't need any to pam_close_session
815                  * and this will make PR_SET_PDEATHSIG work in most cases.
816                  * If this fails, ignore the error - but expect sd-pam threads
817                  * to fail to exit normally */
818                 if (setresuid(uid, uid, uid) < 0)
819                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
820
821                 /* Wait until our parent died. This will only work if
822                  * the above setresuid() succeeds, otherwise the kernel
823                  * will not allow unprivileged parents kill their privileged
824                  * children this way. We rely on the control groups kill logic
825                  * to do the rest for us. */
826                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
827                         goto child_finish;
828
829                 /* Check if our parent process might already have
830                  * died? */
831                 if (getppid() == parent_pid) {
832                         for (;;) {
833                                 if (sigwait(&ss, &sig) < 0) {
834                                         if (errno == EINTR)
835                                                 continue;
836
837                                         goto child_finish;
838                                 }
839
840                                 assert(sig == SIGTERM);
841                                 break;
842                         }
843                 }
844
845                 /* If our parent died we'll end the session */
846                 if (getppid() != parent_pid)
847                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
848                                 goto child_finish;
849
850                 r = 0;
851
852         child_finish:
853                 pam_end(handle, pam_code | PAM_DATA_SILENT);
854                 _exit(r);
855         }
856
857         /* If the child was forked off successfully it will do all the
858          * cleanups, so forget about the handle here. */
859         handle = NULL;
860
861         /* Unblock SIGTERM again in the parent */
862         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
863                 goto fail;
864
865         /* We close the log explicitly here, since the PAM modules
866          * might have opened it, but we don't want this fd around. */
867         closelog();
868
869         *pam_env = e;
870         e = NULL;
871
872         return 0;
873
874 fail:
875         if (pam_code != PAM_SUCCESS)
876                 err = -EPERM;  /* PAM errors do not map to errno */
877         else
878                 err = -errno;
879
880         if (handle) {
881                 if (close_session)
882                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
883
884                 pam_end(handle, pam_code | PAM_DATA_SILENT);
885         }
886
887         strv_free(e);
888
889         closelog();
890
891         if (pam_pid > 1) {
892                 kill(pam_pid, SIGTERM);
893                 kill(pam_pid, SIGCONT);
894         }
895
896         return err;
897 }
898 #endif
899
900 static void rename_process_from_path(const char *path) {
901         char process_name[11];
902         const char *p;
903         size_t l;
904
905         /* This resulting string must fit in 10 chars (i.e. the length
906          * of "/sbin/init") to look pretty in /bin/ps */
907
908         p = path_get_file_name(path);
909         if (isempty(p)) {
910                 rename_process("(...)");
911                 return;
912         }
913
914         l = strlen(p);
915         if (l > 8) {
916                 /* The end of the process name is usually more
917                  * interesting, since the first bit might just be
918                  * "systemd-" */
919                 p = p + l - 8;
920                 l = 8;
921         }
922
923         process_name[0] = '(';
924         memcpy(process_name+1, p, l);
925         process_name[1+l] = ')';
926         process_name[1+l+1] = 0;
927
928         rename_process(process_name);
929 }
930
931 static int apply_seccomp(uint32_t *syscall_filter) {
932         static const struct sock_filter header[] = {
933                 VALIDATE_ARCHITECTURE,
934                 EXAMINE_SYSCALL
935         };
936         static const struct sock_filter footer[] = {
937                 _KILL_PROCESS
938         };
939
940         int i;
941         unsigned n;
942         struct sock_filter *f;
943         struct sock_fprog prog;
944
945         assert(syscall_filter);
946
947         /* First: count the syscalls to check for */
948         for (i = 0, n = 0; i < syscall_max(); i++)
949                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
950                         n++;
951
952         /* Second: build the filter program from a header the syscall
953          * matches and the footer */
954         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
955         memcpy(f, header, sizeof(header));
956
957         for (i = 0, n = 0; i < syscall_max(); i++)
958                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
959                         struct sock_filter item[] = {
960                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
961                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
962                         };
963
964                         assert_cc(ELEMENTSOF(item) == 2);
965
966                         f[ELEMENTSOF(header) + 2*n]  = item[0];
967                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
968
969                         n++;
970                 }
971
972         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
973
974         /* Third: install the filter */
975         zero(prog);
976         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
977         prog.filter = f;
978         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
979                 return -errno;
980
981         return 0;
982 }
983
984 int exec_spawn(ExecCommand *command,
985                char **argv,
986                const ExecContext *context,
987                int fds[], unsigned n_fds,
988                char **environment,
989                bool apply_permissions,
990                bool apply_chroot,
991                bool apply_tty_stdin,
992                bool confirm_spawn,
993                CGroupBonding *cgroup_bondings,
994                CGroupAttribute *cgroup_attributes,
995                const char *cgroup_suffix,
996                const char *unit_id,
997                int idle_pipe[2],
998                pid_t *ret) {
999
1000         pid_t pid;
1001         int r;
1002         char *line;
1003         int socket_fd;
1004         char _cleanup_strv_free_ **files_env = NULL;
1005
1006         assert(command);
1007         assert(context);
1008         assert(ret);
1009         assert(fds || n_fds <= 0);
1010
1011         if (context->std_input == EXEC_INPUT_SOCKET ||
1012             context->std_output == EXEC_OUTPUT_SOCKET ||
1013             context->std_error == EXEC_OUTPUT_SOCKET) {
1014
1015                 if (n_fds != 1)
1016                         return -EINVAL;
1017
1018                 socket_fd = fds[0];
1019
1020                 fds = NULL;
1021                 n_fds = 0;
1022         } else
1023                 socket_fd = -1;
1024
1025         r = exec_context_load_environment(context, &files_env);
1026         if (r < 0) {
1027                 log_struct(LOG_ERR,
1028                            "UNIT=%s", unit_id,
1029                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1030                            "ERRNO=%d", -r,
1031                            NULL);
1032                 return r;
1033         }
1034
1035         if (!argv)
1036                 argv = command->argv;
1037
1038         line = exec_command_line(argv);
1039         if (!line)
1040                 return log_oom();
1041
1042         log_struct(LOG_DEBUG,
1043                    "UNIT=%s", unit_id,
1044                    "MESSAGE=About to execute %s", line,
1045                    NULL);
1046         free(line);
1047
1048         r = cgroup_bonding_realize_list(cgroup_bondings);
1049         if (r < 0)
1050                 return r;
1051
1052         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1053
1054         pid = fork();
1055         if (pid < 0)
1056                 return -errno;
1057
1058         if (pid == 0) {
1059                 int i, err;
1060                 sigset_t ss;
1061                 const char *username = NULL, *home = NULL;
1062                 uid_t uid = (uid_t) -1;
1063                 gid_t gid = (gid_t) -1;
1064                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1065                         **final_env = NULL, **final_argv = NULL;
1066                 unsigned n_env = 0;
1067                 bool set_access = false;
1068
1069                 /* child */
1070
1071                 rename_process_from_path(command->path);
1072
1073                 /* We reset exactly these signals, since they are the
1074                  * only ones we set to SIG_IGN in the main daemon. All
1075                  * others we leave untouched because we set them to
1076                  * SIG_DFL or a valid handler initially, both of which
1077                  * will be demoted to SIG_DFL. */
1078                 default_signals(SIGNALS_CRASH_HANDLER,
1079                                 SIGNALS_IGNORE, -1);
1080
1081                 if (context->ignore_sigpipe)
1082                         ignore_signals(SIGPIPE, -1);
1083
1084                 assert_se(sigemptyset(&ss) == 0);
1085                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1086                         err = -errno;
1087                         r = EXIT_SIGNAL_MASK;
1088                         goto fail_child;
1089                 }
1090
1091                 if (idle_pipe) {
1092                         if (idle_pipe[1] >= 0)
1093                                 close_nointr_nofail(idle_pipe[1]);
1094                         if (idle_pipe[0] >= 0) {
1095                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1096                                 close_nointr_nofail(idle_pipe[0]);
1097                         }
1098                 }
1099
1100                 /* Close sockets very early to make sure we don't
1101                  * block init reexecution because it cannot bind its
1102                  * sockets */
1103                 log_forget_fds();
1104                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1105                                            socket_fd >= 0 ? 1 : n_fds);
1106                 if (err < 0) {
1107                         r = EXIT_FDS;
1108                         goto fail_child;
1109                 }
1110
1111                 if (!context->same_pgrp)
1112                         if (setsid() < 0) {
1113                                 err = -errno;
1114                                 r = EXIT_SETSID;
1115                                 goto fail_child;
1116                         }
1117
1118                 if (context->tcpwrap_name) {
1119                         if (socket_fd >= 0)
1120                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1121                                         err = -EACCES;
1122                                         r = EXIT_TCPWRAP;
1123                                         goto fail_child;
1124                                 }
1125
1126                         for (i = 0; i < (int) n_fds; i++) {
1127                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1128                                         err = -EACCES;
1129                                         r = EXIT_TCPWRAP;
1130                                         goto fail_child;
1131                                 }
1132                         }
1133                 }
1134
1135                 exec_context_tty_reset(context);
1136
1137                 if (confirm_spawn) {
1138                         char response;
1139
1140                         err = ask_for_confirmation(&response, argv);
1141                         if (err == -ETIMEDOUT)
1142                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1143                         else if (err < 0)
1144                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1145                         else if (response == 's') {
1146                                 write_confirm_message("Skipping execution.\n");
1147                                 err = -ECANCELED;
1148                                 r = EXIT_CONFIRM;
1149                                 goto fail_child;
1150                         } else if (response == 'n') {
1151                                 write_confirm_message("Failing execution.\n");
1152                                 err = r = 0;
1153                                 goto fail_child;
1154                         }
1155                 }
1156
1157                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1158                  * must sure to drop O_NONBLOCK */
1159                 if (socket_fd >= 0)
1160                         fd_nonblock(socket_fd, false);
1161
1162                 err = setup_input(context, socket_fd, apply_tty_stdin);
1163                 if (err < 0) {
1164                         r = EXIT_STDIN;
1165                         goto fail_child;
1166                 }
1167
1168                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1169                 if (err < 0) {
1170                         r = EXIT_STDOUT;
1171                         goto fail_child;
1172                 }
1173
1174                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1175                 if (err < 0) {
1176                         r = EXIT_STDERR;
1177                         goto fail_child;
1178                 }
1179
1180                 if (cgroup_bondings) {
1181                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1182                         if (err < 0) {
1183                                 r = EXIT_CGROUP;
1184                                 goto fail_child;
1185                         }
1186                 }
1187
1188                 if (context->oom_score_adjust_set) {
1189                         char t[16];
1190
1191                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1192                         char_array_0(t);
1193
1194                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1195                                 err = -errno;
1196                                 r = EXIT_OOM_ADJUST;
1197                                 goto fail_child;
1198                         }
1199                 }
1200
1201                 if (context->nice_set)
1202                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1203                                 err = -errno;
1204                                 r = EXIT_NICE;
1205                                 goto fail_child;
1206                         }
1207
1208                 if (context->cpu_sched_set) {
1209                         struct sched_param param;
1210
1211                         zero(param);
1212                         param.sched_priority = context->cpu_sched_priority;
1213
1214                         if (sched_setscheduler(0, context->cpu_sched_policy |
1215                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1216                                 err = -errno;
1217                                 r = EXIT_SETSCHEDULER;
1218                                 goto fail_child;
1219                         }
1220                 }
1221
1222                 if (context->cpuset)
1223                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1224                                 err = -errno;
1225                                 r = EXIT_CPUAFFINITY;
1226                                 goto fail_child;
1227                         }
1228
1229                 if (context->ioprio_set)
1230                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1231                                 err = -errno;
1232                                 r = EXIT_IOPRIO;
1233                                 goto fail_child;
1234                         }
1235
1236                 if (context->timer_slack_nsec != (nsec_t) -1)
1237                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1238                                 err = -errno;
1239                                 r = EXIT_TIMERSLACK;
1240                                 goto fail_child;
1241                         }
1242
1243                 if (context->utmp_id)
1244                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1245
1246                 if (context->user) {
1247                         username = context->user;
1248                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1249                         if (err < 0) {
1250                                 r = EXIT_USER;
1251                                 goto fail_child;
1252                         }
1253
1254                         if (is_terminal_input(context->std_input)) {
1255                                 err = chown_terminal(STDIN_FILENO, uid);
1256                                 if (err < 0) {
1257                                         r = EXIT_STDIN;
1258                                         goto fail_child;
1259                                 }
1260                         }
1261
1262                         if (cgroup_bondings && context->control_group_modify) {
1263                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1264                                 if (err >= 0)
1265                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1266                                 if (err < 0) {
1267                                         r = EXIT_CGROUP;
1268                                         goto fail_child;
1269                                 }
1270
1271                                 set_access = true;
1272                         }
1273                 }
1274
1275                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1276                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1277                         if (err < 0) {
1278                                 r = EXIT_CGROUP;
1279                                 goto fail_child;
1280                         }
1281                 }
1282
1283                 if (apply_permissions) {
1284                         err = enforce_groups(context, username, gid);
1285                         if (err < 0) {
1286                                 r = EXIT_GROUP;
1287                                 goto fail_child;
1288                         }
1289                 }
1290
1291                 umask(context->umask);
1292
1293 #ifdef HAVE_PAM
1294                 if (apply_permissions && context->pam_name && username) {
1295                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1296                         if (err < 0) {
1297                                 r = EXIT_PAM;
1298                                 goto fail_child;
1299                         }
1300                 }
1301 #endif
1302                 if (context->private_network) {
1303                         if (unshare(CLONE_NEWNET) < 0) {
1304                                 err = -errno;
1305                                 r = EXIT_NETWORK;
1306                                 goto fail_child;
1307                         }
1308
1309                         loopback_setup();
1310                 }
1311
1312                 if (strv_length(context->read_write_dirs) > 0 ||
1313                     strv_length(context->read_only_dirs) > 0 ||
1314                     strv_length(context->inaccessible_dirs) > 0 ||
1315                     context->mount_flags != 0 ||
1316                     context->private_tmp) {
1317                         err = setup_namespace(context->read_write_dirs,
1318                                               context->read_only_dirs,
1319                                               context->inaccessible_dirs,
1320                                               context->private_tmp,
1321                                               context->mount_flags);
1322                         if (err < 0) {
1323                                 r = EXIT_NAMESPACE;
1324                                 goto fail_child;
1325                         }
1326                 }
1327
1328                 if (apply_chroot) {
1329                         if (context->root_directory)
1330                                 if (chroot(context->root_directory) < 0) {
1331                                         err = -errno;
1332                                         r = EXIT_CHROOT;
1333                                         goto fail_child;
1334                                 }
1335
1336                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1337                                 err = -errno;
1338                                 r = EXIT_CHDIR;
1339                                 goto fail_child;
1340                         }
1341                 } else {
1342                         char _cleanup_free_ *d = NULL;
1343
1344                         if (asprintf(&d, "%s/%s",
1345                                      context->root_directory ? context->root_directory : "",
1346                                      context->working_directory ? context->working_directory : "") < 0) {
1347                                 err = -ENOMEM;
1348                                 r = EXIT_MEMORY;
1349                                 goto fail_child;
1350                         }
1351
1352                         if (chdir(d) < 0) {
1353                                 err = -errno;
1354                                 r = EXIT_CHDIR;
1355                                 goto fail_child;
1356                         }
1357                 }
1358
1359                 /* We repeat the fd closing here, to make sure that
1360                  * nothing is leaked from the PAM modules */
1361                 err = close_all_fds(fds, n_fds);
1362                 if (err >= 0)
1363                         err = shift_fds(fds, n_fds);
1364                 if (err >= 0)
1365                         err = flags_fds(fds, n_fds, context->non_blocking);
1366                 if (err < 0) {
1367                         r = EXIT_FDS;
1368                         goto fail_child;
1369                 }
1370
1371                 if (apply_permissions) {
1372
1373                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1374                                 if (!context->rlimit[i])
1375                                         continue;
1376
1377                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1378                                         err = -errno;
1379                                         r = EXIT_LIMITS;
1380                                         goto fail_child;
1381                                 }
1382                         }
1383
1384                         if (context->capability_bounding_set_drop) {
1385                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1386                                 if (err < 0) {
1387                                         r = EXIT_CAPABILITIES;
1388                                         goto fail_child;
1389                                 }
1390                         }
1391
1392                         if (context->user) {
1393                                 err = enforce_user(context, uid);
1394                                 if (err < 0) {
1395                                         r = EXIT_USER;
1396                                         goto fail_child;
1397                                 }
1398                         }
1399
1400                         /* PR_GET_SECUREBITS is not privileged, while
1401                          * PR_SET_SECUREBITS is. So to suppress
1402                          * potential EPERMs we'll try not to call
1403                          * PR_SET_SECUREBITS unless necessary. */
1404                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1405                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1406                                         err = -errno;
1407                                         r = EXIT_SECUREBITS;
1408                                         goto fail_child;
1409                                 }
1410
1411                         if (context->capabilities)
1412                                 if (cap_set_proc(context->capabilities) < 0) {
1413                                         err = -errno;
1414                                         r = EXIT_CAPABILITIES;
1415                                         goto fail_child;
1416                                 }
1417
1418                         if (context->no_new_privileges)
1419                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1420                                         err = -errno;
1421                                         r = EXIT_NO_NEW_PRIVILEGES;
1422                                         goto fail_child;
1423                                 }
1424
1425                         if (context->syscall_filter) {
1426                                 err = apply_seccomp(context->syscall_filter);
1427                                 if (err < 0) {
1428                                         r = EXIT_SECCOMP;
1429                                         goto fail_child;
1430                                 }
1431                         }
1432                 }
1433
1434                 if (!(our_env = new0(char*, 7))) {
1435                         err = -ENOMEM;
1436                         r = EXIT_MEMORY;
1437                         goto fail_child;
1438                 }
1439
1440                 if (n_fds > 0)
1441                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1442                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1443                                 err = -ENOMEM;
1444                                 r = EXIT_MEMORY;
1445                                 goto fail_child;
1446                         }
1447
1448                 if (home)
1449                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1450                                 err = -ENOMEM;
1451                                 r = EXIT_MEMORY;
1452                                 goto fail_child;
1453                         }
1454
1455                 if (username)
1456                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1457                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1458                                 err = -ENOMEM;
1459                                 r = EXIT_MEMORY;
1460                                 goto fail_child;
1461                         }
1462
1463                 if (is_terminal_input(context->std_input) ||
1464                     context->std_output == EXEC_OUTPUT_TTY ||
1465                     context->std_error == EXEC_OUTPUT_TTY)
1466                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1467                                 err = -ENOMEM;
1468                                 r = EXIT_MEMORY;
1469                                 goto fail_child;
1470                         }
1471
1472                 assert(n_env <= 7);
1473
1474                 if (!(final_env = strv_env_merge(
1475                                       5,
1476                                       environment,
1477                                       our_env,
1478                                       context->environment,
1479                                       files_env,
1480                                       pam_env,
1481                                       NULL))) {
1482                         err = -ENOMEM;
1483                         r = EXIT_MEMORY;
1484                         goto fail_child;
1485                 }
1486
1487                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1488                         err = -ENOMEM;
1489                         r = EXIT_MEMORY;
1490                         goto fail_child;
1491                 }
1492
1493                 final_env = strv_env_clean(final_env);
1494
1495                 execve(command->path, final_argv, final_env);
1496                 err = -errno;
1497                 r = EXIT_EXEC;
1498
1499         fail_child:
1500                 if (r != 0) {
1501                         log_open();
1502                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1503                                    "EXECUTABLE=%s", command->path,
1504                                    "MESSAGE=Failed at step %s spawning %s: %s",
1505                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1506                                           command->path, strerror(-err),
1507                                    "ERRNO=%d", -err,
1508                                    NULL);
1509                         log_close();
1510                 }
1511
1512                 _exit(r);
1513         }
1514
1515         log_struct(LOG_DEBUG,
1516                    "UNIT=%s", unit_id,
1517                    "MESSAGE=Forked %s as %lu",
1518                           command->path, (unsigned long) pid,
1519                    NULL);
1520
1521         /* We add the new process to the cgroup both in the child (so
1522          * that we can be sure that no user code is ever executed
1523          * outside of the cgroup) and in the parent (so that we can be
1524          * sure that when we kill the cgroup the process will be
1525          * killed too). */
1526         if (cgroup_bondings)
1527                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1528
1529         exec_status_start(&command->exec_status, pid);
1530
1531         *ret = pid;
1532         return 0;
1533 }
1534
1535 void exec_context_init(ExecContext *c) {
1536         assert(c);
1537
1538         c->umask = 0022;
1539         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540         c->cpu_sched_policy = SCHED_OTHER;
1541         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542         c->syslog_level_prefix = true;
1543         c->control_group_persistent = -1;
1544         c->ignore_sigpipe = true;
1545         c->timer_slack_nsec = (nsec_t) -1;
1546 }
1547
1548 void exec_context_done(ExecContext *c) {
1549         unsigned l;
1550
1551         assert(c);
1552
1553         strv_free(c->environment);
1554         c->environment = NULL;
1555
1556         strv_free(c->environment_files);
1557         c->environment_files = NULL;
1558
1559         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1560                 free(c->rlimit[l]);
1561                 c->rlimit[l] = NULL;
1562         }
1563
1564         free(c->working_directory);
1565         c->working_directory = NULL;
1566         free(c->root_directory);
1567         c->root_directory = NULL;
1568
1569         free(c->tty_path);
1570         c->tty_path = NULL;
1571
1572         free(c->tcpwrap_name);
1573         c->tcpwrap_name = NULL;
1574
1575         free(c->syslog_identifier);
1576         c->syslog_identifier = NULL;
1577
1578         free(c->user);
1579         c->user = NULL;
1580
1581         free(c->group);
1582         c->group = NULL;
1583
1584         strv_free(c->supplementary_groups);
1585         c->supplementary_groups = NULL;
1586
1587         free(c->pam_name);
1588         c->pam_name = NULL;
1589
1590         if (c->capabilities) {
1591                 cap_free(c->capabilities);
1592                 c->capabilities = NULL;
1593         }
1594
1595         strv_free(c->read_only_dirs);
1596         c->read_only_dirs = NULL;
1597
1598         strv_free(c->read_write_dirs);
1599         c->read_write_dirs = NULL;
1600
1601         strv_free(c->inaccessible_dirs);
1602         c->inaccessible_dirs = NULL;
1603
1604         if (c->cpuset)
1605                 CPU_FREE(c->cpuset);
1606
1607         free(c->utmp_id);
1608         c->utmp_id = NULL;
1609
1610         free(c->syscall_filter);
1611         c->syscall_filter = NULL;
1612 }
1613
1614 void exec_command_done(ExecCommand *c) {
1615         assert(c);
1616
1617         free(c->path);
1618         c->path = NULL;
1619
1620         strv_free(c->argv);
1621         c->argv = NULL;
1622 }
1623
1624 void exec_command_done_array(ExecCommand *c, unsigned n) {
1625         unsigned i;
1626
1627         for (i = 0; i < n; i++)
1628                 exec_command_done(c+i);
1629 }
1630
1631 void exec_command_free_list(ExecCommand *c) {
1632         ExecCommand *i;
1633
1634         while ((i = c)) {
1635                 LIST_REMOVE(ExecCommand, command, c, i);
1636                 exec_command_done(i);
1637                 free(i);
1638         }
1639 }
1640
1641 void exec_command_free_array(ExecCommand **c, unsigned n) {
1642         unsigned i;
1643
1644         for (i = 0; i < n; i++) {
1645                 exec_command_free_list(c[i]);
1646                 c[i] = NULL;
1647         }
1648 }
1649
1650 int exec_context_load_environment(const ExecContext *c, char ***l) {
1651         char **i, **r = NULL;
1652
1653         assert(c);
1654         assert(l);
1655
1656         STRV_FOREACH(i, c->environment_files) {
1657                 char *fn;
1658                 int k;
1659                 bool ignore = false;
1660                 char **p;
1661
1662                 fn = *i;
1663
1664                 if (fn[0] == '-') {
1665                         ignore = true;
1666                         fn ++;
1667                 }
1668
1669                 if (!path_is_absolute(fn)) {
1670
1671                         if (ignore)
1672                                 continue;
1673
1674                         strv_free(r);
1675                         return -EINVAL;
1676                 }
1677
1678                 if ((k = load_env_file(fn, &p)) < 0) {
1679
1680                         if (ignore)
1681                                 continue;
1682
1683                         strv_free(r);
1684                         return k;
1685                 }
1686
1687                 if (r == NULL)
1688                         r = p;
1689                 else {
1690                         char **m;
1691
1692                         m = strv_env_merge(2, r, p);
1693                         strv_free(r);
1694                         strv_free(p);
1695
1696                         if (!m)
1697                                 return -ENOMEM;
1698
1699                         r = m;
1700                 }
1701         }
1702
1703         *l = r;
1704
1705         return 0;
1706 }
1707
1708 static void strv_fprintf(FILE *f, char **l) {
1709         char **g;
1710
1711         assert(f);
1712
1713         STRV_FOREACH(g, l)
1714                 fprintf(f, " %s", *g);
1715 }
1716
1717 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1718         char ** e;
1719         unsigned i;
1720
1721         assert(c);
1722         assert(f);
1723
1724         if (!prefix)
1725                 prefix = "";
1726
1727         fprintf(f,
1728                 "%sUMask: %04o\n"
1729                 "%sWorkingDirectory: %s\n"
1730                 "%sRootDirectory: %s\n"
1731                 "%sNonBlocking: %s\n"
1732                 "%sPrivateTmp: %s\n"
1733                 "%sControlGroupModify: %s\n"
1734                 "%sControlGroupPersistent: %s\n"
1735                 "%sPrivateNetwork: %s\n"
1736                 "%sIgnoreSIGPIPE: %s\n",
1737                 prefix, c->umask,
1738                 prefix, c->working_directory ? c->working_directory : "/",
1739                 prefix, c->root_directory ? c->root_directory : "/",
1740                 prefix, yes_no(c->non_blocking),
1741                 prefix, yes_no(c->private_tmp),
1742                 prefix, yes_no(c->control_group_modify),
1743                 prefix, yes_no(c->control_group_persistent),
1744                 prefix, yes_no(c->private_network),
1745                 prefix, yes_no(c->ignore_sigpipe));
1746
1747         STRV_FOREACH(e, c->environment)
1748                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1749
1750         STRV_FOREACH(e, c->environment_files)
1751                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1752
1753         if (c->tcpwrap_name)
1754                 fprintf(f,
1755                         "%sTCPWrapName: %s\n",
1756                         prefix, c->tcpwrap_name);
1757
1758         if (c->nice_set)
1759                 fprintf(f,
1760                         "%sNice: %i\n",
1761                         prefix, c->nice);
1762
1763         if (c->oom_score_adjust_set)
1764                 fprintf(f,
1765                         "%sOOMScoreAdjust: %i\n",
1766                         prefix, c->oom_score_adjust);
1767
1768         for (i = 0; i < RLIM_NLIMITS; i++)
1769                 if (c->rlimit[i])
1770                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1771
1772         if (c->ioprio_set) {
1773                 char *class_str;
1774                 int r;
1775
1776                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1777                 if (r < 0)
1778                         class_str = NULL;
1779                 fprintf(f,
1780                         "%sIOSchedulingClass: %s\n"
1781                         "%sIOPriority: %i\n",
1782                         prefix, strna(class_str),
1783                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1784                 free(class_str);
1785         }
1786
1787         if (c->cpu_sched_set) {
1788                 char *policy_str;
1789                 int r;
1790
1791                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1792                 if (r < 0)
1793                         policy_str = NULL;
1794                 fprintf(f,
1795                         "%sCPUSchedulingPolicy: %s\n"
1796                         "%sCPUSchedulingPriority: %i\n"
1797                         "%sCPUSchedulingResetOnFork: %s\n",
1798                         prefix, strna(policy_str),
1799                         prefix, c->cpu_sched_priority,
1800                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1801                 free(policy_str);
1802         }
1803
1804         if (c->cpuset) {
1805                 fprintf(f, "%sCPUAffinity:", prefix);
1806                 for (i = 0; i < c->cpuset_ncpus; i++)
1807                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1808                                 fprintf(f, " %i", i);
1809                 fputs("\n", f);
1810         }
1811
1812         if (c->timer_slack_nsec != (nsec_t) -1)
1813                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1814
1815         fprintf(f,
1816                 "%sStandardInput: %s\n"
1817                 "%sStandardOutput: %s\n"
1818                 "%sStandardError: %s\n",
1819                 prefix, exec_input_to_string(c->std_input),
1820                 prefix, exec_output_to_string(c->std_output),
1821                 prefix, exec_output_to_string(c->std_error));
1822
1823         if (c->tty_path)
1824                 fprintf(f,
1825                         "%sTTYPath: %s\n"
1826                         "%sTTYReset: %s\n"
1827                         "%sTTYVHangup: %s\n"
1828                         "%sTTYVTDisallocate: %s\n",
1829                         prefix, c->tty_path,
1830                         prefix, yes_no(c->tty_reset),
1831                         prefix, yes_no(c->tty_vhangup),
1832                         prefix, yes_no(c->tty_vt_disallocate));
1833
1834         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1835             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1836             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1837             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1838                 char *fac_str, *lvl_str;
1839                 int r;
1840
1841                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1842                 if (r < 0)
1843                         fac_str = NULL;
1844
1845                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1846                 if (r < 0)
1847                         lvl_str = NULL;
1848
1849                 fprintf(f,
1850                         "%sSyslogFacility: %s\n"
1851                         "%sSyslogLevel: %s\n",
1852                         prefix, strna(fac_str),
1853                         prefix, strna(lvl_str));
1854                 free(lvl_str);
1855                 free(fac_str);
1856         }
1857
1858         if (c->capabilities) {
1859                 char *t;
1860                 if ((t = cap_to_text(c->capabilities, NULL))) {
1861                         fprintf(f, "%sCapabilities: %s\n",
1862                                 prefix, t);
1863                         cap_free(t);
1864                 }
1865         }
1866
1867         if (c->secure_bits)
1868                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1869                         prefix,
1870                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1871                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1872                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1873                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1874                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1875                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1876
1877         if (c->capability_bounding_set_drop) {
1878                 unsigned long l;
1879                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1880
1881                 for (l = 0; l <= cap_last_cap(); l++)
1882                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1883                                 char *t;
1884
1885                                 if ((t = cap_to_name(l))) {
1886                                         fprintf(f, " %s", t);
1887                                         cap_free(t);
1888                                 }
1889                         }
1890
1891                 fputs("\n", f);
1892         }
1893
1894         if (c->user)
1895                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1896         if (c->group)
1897                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1898
1899         if (strv_length(c->supplementary_groups) > 0) {
1900                 fprintf(f, "%sSupplementaryGroups:", prefix);
1901                 strv_fprintf(f, c->supplementary_groups);
1902                 fputs("\n", f);
1903         }
1904
1905         if (c->pam_name)
1906                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1907
1908         if (strv_length(c->read_write_dirs) > 0) {
1909                 fprintf(f, "%sReadWriteDirs:", prefix);
1910                 strv_fprintf(f, c->read_write_dirs);
1911                 fputs("\n", f);
1912         }
1913
1914         if (strv_length(c->read_only_dirs) > 0) {
1915                 fprintf(f, "%sReadOnlyDirs:", prefix);
1916                 strv_fprintf(f, c->read_only_dirs);
1917                 fputs("\n", f);
1918         }
1919
1920         if (strv_length(c->inaccessible_dirs) > 0) {
1921                 fprintf(f, "%sInaccessibleDirs:", prefix);
1922                 strv_fprintf(f, c->inaccessible_dirs);
1923                 fputs("\n", f);
1924         }
1925
1926         if (c->utmp_id)
1927                 fprintf(f,
1928                         "%sUtmpIdentifier: %s\n",
1929                         prefix, c->utmp_id);
1930 }
1931
1932 void exec_status_start(ExecStatus *s, pid_t pid) {
1933         assert(s);
1934
1935         zero(*s);
1936         s->pid = pid;
1937         dual_timestamp_get(&s->start_timestamp);
1938 }
1939
1940 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1941         assert(s);
1942
1943         if (s->pid && s->pid != pid)
1944                 zero(*s);
1945
1946         s->pid = pid;
1947         dual_timestamp_get(&s->exit_timestamp);
1948
1949         s->code = code;
1950         s->status = status;
1951
1952         if (context) {
1953                 if (context->utmp_id)
1954                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1955
1956                 exec_context_tty_reset(context);
1957         }
1958 }
1959
1960 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1961         char buf[FORMAT_TIMESTAMP_MAX];
1962
1963         assert(s);
1964         assert(f);
1965
1966         if (!prefix)
1967                 prefix = "";
1968
1969         if (s->pid <= 0)
1970                 return;
1971
1972         fprintf(f,
1973                 "%sPID: %lu\n",
1974                 prefix, (unsigned long) s->pid);
1975
1976         if (s->start_timestamp.realtime > 0)
1977                 fprintf(f,
1978                         "%sStart Timestamp: %s\n",
1979                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1980
1981         if (s->exit_timestamp.realtime > 0)
1982                 fprintf(f,
1983                         "%sExit Timestamp: %s\n"
1984                         "%sExit Code: %s\n"
1985                         "%sExit Status: %i\n",
1986                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1987                         prefix, sigchld_code_to_string(s->code),
1988                         prefix, s->status);
1989 }
1990
1991 char *exec_command_line(char **argv) {
1992         size_t k;
1993         char *n, *p, **a;
1994         bool first = true;
1995
1996         assert(argv);
1997
1998         k = 1;
1999         STRV_FOREACH(a, argv)
2000                 k += strlen(*a)+3;
2001
2002         if (!(n = new(char, k)))
2003                 return NULL;
2004
2005         p = n;
2006         STRV_FOREACH(a, argv) {
2007
2008                 if (!first)
2009                         *(p++) = ' ';
2010                 else
2011                         first = false;
2012
2013                 if (strpbrk(*a, WHITESPACE)) {
2014                         *(p++) = '\'';
2015                         p = stpcpy(p, *a);
2016                         *(p++) = '\'';
2017                 } else
2018                         p = stpcpy(p, *a);
2019
2020         }
2021
2022         *p = 0;
2023
2024         /* FIXME: this doesn't really handle arguments that have
2025          * spaces and ticks in them */
2026
2027         return n;
2028 }
2029
2030 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2031         char *p2;
2032         const char *prefix2;
2033
2034         char *cmd;
2035
2036         assert(c);
2037         assert(f);
2038
2039         if (!prefix)
2040                 prefix = "";
2041         p2 = strappend(prefix, "\t");
2042         prefix2 = p2 ? p2 : prefix;
2043
2044         cmd = exec_command_line(c->argv);
2045
2046         fprintf(f,
2047                 "%sCommand Line: %s\n",
2048                 prefix, cmd ? cmd : strerror(ENOMEM));
2049
2050         free(cmd);
2051
2052         exec_status_dump(&c->exec_status, f, prefix2);
2053
2054         free(p2);
2055 }
2056
2057 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2058         assert(f);
2059
2060         if (!prefix)
2061                 prefix = "";
2062
2063         LIST_FOREACH(command, c, c)
2064                 exec_command_dump(c, f, prefix);
2065 }
2066
2067 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2068         ExecCommand *end;
2069
2070         assert(l);
2071         assert(e);
2072
2073         if (*l) {
2074                 /* It's kind of important, that we keep the order here */
2075                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2076                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2077         } else
2078               *l = e;
2079 }
2080
2081 int exec_command_set(ExecCommand *c, const char *path, ...) {
2082         va_list ap;
2083         char **l, *p;
2084
2085         assert(c);
2086         assert(path);
2087
2088         va_start(ap, path);
2089         l = strv_new_ap(path, ap);
2090         va_end(ap);
2091
2092         if (!l)
2093                 return -ENOMEM;
2094
2095         if (!(p = strdup(path))) {
2096                 strv_free(l);
2097                 return -ENOMEM;
2098         }
2099
2100         free(c->path);
2101         c->path = p;
2102
2103         strv_free(c->argv);
2104         c->argv = l;
2105
2106         return 0;
2107 }
2108
2109 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2110         [EXEC_INPUT_NULL] = "null",
2111         [EXEC_INPUT_TTY] = "tty",
2112         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2113         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2114         [EXEC_INPUT_SOCKET] = "socket"
2115 };
2116
2117 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2118
2119 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2120         [EXEC_OUTPUT_INHERIT] = "inherit",
2121         [EXEC_OUTPUT_NULL] = "null",
2122         [EXEC_OUTPUT_TTY] = "tty",
2123         [EXEC_OUTPUT_SYSLOG] = "syslog",
2124         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2125         [EXEC_OUTPUT_KMSG] = "kmsg",
2126         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2127         [EXEC_OUTPUT_JOURNAL] = "journal",
2128         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2129         [EXEC_OUTPUT_SOCKET] = "socket"
2130 };
2131
2132 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);