chiark / gitweb /
76284700d7b3044b425775587b8296d62edd9038
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42
43 #ifdef HAVE_PAM
44 #include <security/pam_appl.h>
45 #endif
46
47 #include "execute.h"
48 #include "strv.h"
49 #include "macro.h"
50 #include "capability.h"
51 #include "util.h"
52 #include "log.h"
53 #include "sd-messages.h"
54 #include "ioprio.h"
55 #include "securebits.h"
56 #include "cgroup.h"
57 #include "namespace.h"
58 #include "tcpwrap.h"
59 #include "exit-status.h"
60 #include "missing.h"
61 #include "utmp-wtmp.h"
62 #include "def.h"
63 #include "loopback-setup.h"
64 #include "path-util.h"
65 #include "syscall-list.h"
66
67 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
68
69 /* This assumes there is a 'tty' group */
70 #define TTY_MODE 0620
71
72 static int shift_fds(int fds[], unsigned n_fds) {
73         int start, restart_from;
74
75         if (n_fds <= 0)
76                 return 0;
77
78         /* Modifies the fds array! (sorts it) */
79
80         assert(fds);
81
82         start = 0;
83         for (;;) {
84                 int i;
85
86                 restart_from = -1;
87
88                 for (i = start; i < (int) n_fds; i++) {
89                         int nfd;
90
91                         /* Already at right index? */
92                         if (fds[i] == i+3)
93                                 continue;
94
95                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
96                                 return -errno;
97
98                         close_nointr_nofail(fds[i]);
99                         fds[i] = nfd;
100
101                         /* Hmm, the fd we wanted isn't free? Then
102                          * let's remember that and try again from here*/
103                         if (nfd != i+3 && restart_from < 0)
104                                 restart_from = i;
105                 }
106
107                 if (restart_from < 0)
108                         break;
109
110                 start = restart_from;
111         }
112
113         return 0;
114 }
115
116 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
117         unsigned i;
118         int r;
119
120         if (n_fds <= 0)
121                 return 0;
122
123         assert(fds);
124
125         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
126
127         for (i = 0; i < n_fds; i++) {
128
129                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
130                         return r;
131
132                 /* We unconditionally drop FD_CLOEXEC from the fds,
133                  * since after all we want to pass these fds to our
134                  * children */
135
136                 if ((r = fd_cloexec(fds[i], false)) < 0)
137                         return r;
138         }
139
140         return 0;
141 }
142
143 static const char *tty_path(const ExecContext *context) {
144         assert(context);
145
146         if (context->tty_path)
147                 return context->tty_path;
148
149         return "/dev/console";
150 }
151
152 void exec_context_tty_reset(const ExecContext *context) {
153         assert(context);
154
155         if (context->tty_vhangup)
156                 terminal_vhangup(tty_path(context));
157
158         if (context->tty_reset)
159                 reset_terminal(tty_path(context));
160
161         if (context->tty_vt_disallocate && context->tty_path)
162                 vt_disallocate(context->tty_path);
163 }
164
165 static int open_null_as(int flags, int nfd) {
166         int fd, r;
167
168         assert(nfd >= 0);
169
170         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
171                 return -errno;
172
173         if (fd != nfd) {
174                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
175                 close_nointr_nofail(fd);
176         } else
177                 r = nfd;
178
179         return r;
180 }
181
182 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
183         int fd, r;
184         union sockaddr_union sa;
185
186         assert(context);
187         assert(output < _EXEC_OUTPUT_MAX);
188         assert(ident);
189         assert(nfd >= 0);
190
191         fd = socket(AF_UNIX, SOCK_STREAM, 0);
192         if (fd < 0)
193                 return -errno;
194
195         zero(sa);
196         sa.un.sun_family = AF_UNIX;
197         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
198
199         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
200         if (r < 0) {
201                 close_nointr_nofail(fd);
202                 return -errno;
203         }
204
205         if (shutdown(fd, SHUT_RD) < 0) {
206                 close_nointr_nofail(fd);
207                 return -errno;
208         }
209
210         dprintf(fd,
211                 "%s\n"
212                 "%s\n"
213                 "%i\n"
214                 "%i\n"
215                 "%i\n"
216                 "%i\n"
217                 "%i\n",
218                 context->syslog_identifier ? context->syslog_identifier : ident,
219                 unit_id,
220                 context->syslog_priority,
221                 !!context->syslog_level_prefix,
222                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
223                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
224                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
225
226         if (fd != nfd) {
227                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
228                 close_nointr_nofail(fd);
229         } else
230                 r = nfd;
231
232         return r;
233 }
234 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
235         int fd, r;
236
237         assert(path);
238         assert(nfd >= 0);
239
240         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
241                 return fd;
242
243         if (fd != nfd) {
244                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
245                 close_nointr_nofail(fd);
246         } else
247                 r = nfd;
248
249         return r;
250 }
251
252 static bool is_terminal_input(ExecInput i) {
253         return
254                 i == EXEC_INPUT_TTY ||
255                 i == EXEC_INPUT_TTY_FORCE ||
256                 i == EXEC_INPUT_TTY_FAIL;
257 }
258
259 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
260
261         if (is_terminal_input(std_input) && !apply_tty_stdin)
262                 return EXEC_INPUT_NULL;
263
264         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
265                 return EXEC_INPUT_NULL;
266
267         return std_input;
268 }
269
270 static int fixup_output(ExecOutput std_output, int socket_fd) {
271
272         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
273                 return EXEC_OUTPUT_INHERIT;
274
275         return std_output;
276 }
277
278 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
279         ExecInput i;
280
281         assert(context);
282
283         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
284
285         switch (i) {
286
287         case EXEC_INPUT_NULL:
288                 return open_null_as(O_RDONLY, STDIN_FILENO);
289
290         case EXEC_INPUT_TTY:
291         case EXEC_INPUT_TTY_FORCE:
292         case EXEC_INPUT_TTY_FAIL: {
293                 int fd, r;
294
295                 if ((fd = acquire_terminal(
296                                      tty_path(context),
297                                      i == EXEC_INPUT_TTY_FAIL,
298                                      i == EXEC_INPUT_TTY_FORCE,
299                                      false,
300                                      (usec_t) -1)) < 0)
301                         return fd;
302
303                 if (fd != STDIN_FILENO) {
304                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
305                         close_nointr_nofail(fd);
306                 } else
307                         r = STDIN_FILENO;
308
309                 return r;
310         }
311
312         case EXEC_INPUT_SOCKET:
313                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
314
315         default:
316                 assert_not_reached("Unknown input type");
317         }
318 }
319
320 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
321         ExecOutput o;
322         ExecInput i;
323
324         assert(context);
325         assert(ident);
326
327         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
328         o = fixup_output(context->std_output, socket_fd);
329
330         /* This expects the input is already set up */
331
332         switch (o) {
333
334         case EXEC_OUTPUT_INHERIT:
335
336                 /* If input got downgraded, inherit the original value */
337                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
338                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
339
340                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
341                 if (i != EXEC_INPUT_NULL)
342                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
343
344                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
345                 if (getppid() != 1)
346                         return STDOUT_FILENO;
347
348                 /* We need to open /dev/null here anew, to get the
349                  * right access mode. So we fall through */
350
351         case EXEC_OUTPUT_NULL:
352                 return open_null_as(O_WRONLY, STDOUT_FILENO);
353
354         case EXEC_OUTPUT_TTY:
355                 if (is_terminal_input(i))
356                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
357
358                 /* We don't reset the terminal if this is just about output */
359                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
360
361         case EXEC_OUTPUT_SYSLOG:
362         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
363         case EXEC_OUTPUT_KMSG:
364         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
365         case EXEC_OUTPUT_JOURNAL:
366         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
367                 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
368
369         case EXEC_OUTPUT_SOCKET:
370                 assert(socket_fd >= 0);
371                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
372
373         default:
374                 assert_not_reached("Unknown output type");
375         }
376 }
377
378 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
379         ExecOutput o, e;
380         ExecInput i;
381
382         assert(context);
383         assert(ident);
384
385         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
386         o = fixup_output(context->std_output, socket_fd);
387         e = fixup_output(context->std_error, socket_fd);
388
389         /* This expects the input and output are already set up */
390
391         /* Don't change the stderr file descriptor if we inherit all
392          * the way and are not on a tty */
393         if (e == EXEC_OUTPUT_INHERIT &&
394             o == EXEC_OUTPUT_INHERIT &&
395             i == EXEC_INPUT_NULL &&
396             !is_terminal_input(context->std_input) &&
397             getppid () != 1)
398                 return STDERR_FILENO;
399
400         /* Duplicate from stdout if possible */
401         if (e == o || e == EXEC_OUTPUT_INHERIT)
402                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
403
404         switch (e) {
405
406         case EXEC_OUTPUT_NULL:
407                 return open_null_as(O_WRONLY, STDERR_FILENO);
408
409         case EXEC_OUTPUT_TTY:
410                 if (is_terminal_input(i))
411                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
412
413                 /* We don't reset the terminal if this is just about output */
414                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
415
416         case EXEC_OUTPUT_SYSLOG:
417         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
418         case EXEC_OUTPUT_KMSG:
419         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
420         case EXEC_OUTPUT_JOURNAL:
421         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
422                 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
423
424         case EXEC_OUTPUT_SOCKET:
425                 assert(socket_fd >= 0);
426                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
427
428         default:
429                 assert_not_reached("Unknown error type");
430         }
431 }
432
433 static int chown_terminal(int fd, uid_t uid) {
434         struct stat st;
435
436         assert(fd >= 0);
437
438         /* This might fail. What matters are the results. */
439         (void) fchown(fd, uid, -1);
440         (void) fchmod(fd, TTY_MODE);
441
442         if (fstat(fd, &st) < 0)
443                 return -errno;
444
445         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
446                 return -EPERM;
447
448         return 0;
449 }
450
451 static int setup_confirm_stdio(int *_saved_stdin,
452                                int *_saved_stdout) {
453         int fd = -1, saved_stdin, saved_stdout = -1, r;
454
455         assert(_saved_stdin);
456         assert(_saved_stdout);
457
458         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
459         if (saved_stdin < 0)
460                 return -errno;
461
462         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
463         if (saved_stdout < 0) {
464                 r = errno;
465                 goto fail;
466         }
467
468         fd = acquire_terminal(
469                         "/dev/console",
470                         false,
471                         false,
472                         false,
473                         DEFAULT_CONFIRM_USEC);
474         if (fd < 0) {
475                 r = fd;
476                 goto fail;
477         }
478
479         r = chown_terminal(fd, getuid());
480         if (r < 0)
481                 goto fail;
482
483         if (dup2(fd, STDIN_FILENO) < 0) {
484                 r = -errno;
485                 goto fail;
486         }
487
488         if (dup2(fd, STDOUT_FILENO) < 0) {
489                 r = -errno;
490                 goto fail;
491         }
492
493         if (fd >= 2)
494                 close_nointr_nofail(fd);
495
496         *_saved_stdin = saved_stdin;
497         *_saved_stdout = saved_stdout;
498
499         return 0;
500
501 fail:
502         if (saved_stdout >= 0)
503                 close_nointr_nofail(saved_stdout);
504
505         if (saved_stdin >= 0)
506                 close_nointr_nofail(saved_stdin);
507
508         if (fd >= 0)
509                 close_nointr_nofail(fd);
510
511         return r;
512 }
513
514 static int write_confirm_message(const char *format, ...) {
515         int fd;
516         va_list ap;
517
518         assert(format);
519
520         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
521         if (fd < 0)
522                 return fd;
523
524         va_start(ap, format);
525         vdprintf(fd, format, ap);
526         va_end(ap);
527
528         close_nointr_nofail(fd);
529
530         return 0;
531 }
532
533 static int restore_confirm_stdio(int *saved_stdin,
534                                  int *saved_stdout) {
535
536         int r = 0;
537
538         assert(saved_stdin);
539         assert(saved_stdout);
540
541         release_terminal();
542
543         if (*saved_stdin >= 0)
544                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
545                         r = -errno;
546
547         if (*saved_stdout >= 0)
548                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
549                         r = -errno;
550
551         if (*saved_stdin >= 0)
552                 close_nointr_nofail(*saved_stdin);
553
554         if (*saved_stdout >= 0)
555                 close_nointr_nofail(*saved_stdout);
556
557         return r;
558 }
559
560 static int ask_for_confirmation(char *response, char **argv) {
561         int saved_stdout = -1, saved_stdin = -1, r;
562         char *line;
563
564         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
565         if (r < 0)
566                 return r;
567
568         line = exec_command_line(argv);
569         if (!line)
570                 return -ENOMEM;
571
572         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
573         free(line);
574
575         restore_confirm_stdio(&saved_stdin, &saved_stdout);
576
577         return r;
578 }
579
580 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
581         bool keep_groups = false;
582         int r;
583
584         assert(context);
585
586         /* Lookup and set GID and supplementary group list. Here too
587          * we avoid NSS lookups for gid=0. */
588
589         if (context->group || username) {
590
591                 if (context->group) {
592                         const char *g = context->group;
593
594                         if ((r = get_group_creds(&g, &gid)) < 0)
595                                 return r;
596                 }
597
598                 /* First step, initialize groups from /etc/groups */
599                 if (username && gid != 0) {
600                         if (initgroups(username, gid) < 0)
601                                 return -errno;
602
603                         keep_groups = true;
604                 }
605
606                 /* Second step, set our gids */
607                 if (setresgid(gid, gid, gid) < 0)
608                         return -errno;
609         }
610
611         if (context->supplementary_groups) {
612                 int ngroups_max, k;
613                 gid_t *gids;
614                 char **i;
615
616                 /* Final step, initialize any manually set supplementary groups */
617                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
618
619                 if (!(gids = new(gid_t, ngroups_max)))
620                         return -ENOMEM;
621
622                 if (keep_groups) {
623                         if ((k = getgroups(ngroups_max, gids)) < 0) {
624                                 free(gids);
625                                 return -errno;
626                         }
627                 } else
628                         k = 0;
629
630                 STRV_FOREACH(i, context->supplementary_groups) {
631                         const char *g;
632
633                         if (k >= ngroups_max) {
634                                 free(gids);
635                                 return -E2BIG;
636                         }
637
638                         g = *i;
639                         r = get_group_creds(&g, gids+k);
640                         if (r < 0) {
641                                 free(gids);
642                                 return r;
643                         }
644
645                         k++;
646                 }
647
648                 if (setgroups(k, gids) < 0) {
649                         free(gids);
650                         return -errno;
651                 }
652
653                 free(gids);
654         }
655
656         return 0;
657 }
658
659 static int enforce_user(const ExecContext *context, uid_t uid) {
660         int r;
661         assert(context);
662
663         /* Sets (but doesn't lookup) the uid and make sure we keep the
664          * capabilities while doing so. */
665
666         if (context->capabilities) {
667                 cap_t d;
668                 static const cap_value_t bits[] = {
669                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
670                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
671                 };
672
673                 /* First step: If we need to keep capabilities but
674                  * drop privileges we need to make sure we keep our
675                  * caps, whiel we drop privileges. */
676                 if (uid != 0) {
677                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
678
679                         if (prctl(PR_GET_SECUREBITS) != sb)
680                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681                                         return -errno;
682                 }
683
684                 /* Second step: set the capabilities. This will reduce
685                  * the capabilities to the minimum we need. */
686
687                 if (!(d = cap_dup(context->capabilities)))
688                         return -errno;
689
690                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
691                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
692                         r = -errno;
693                         cap_free(d);
694                         return r;
695                 }
696
697                 if (cap_set_proc(d) < 0) {
698                         r = -errno;
699                         cap_free(d);
700                         return r;
701                 }
702
703                 cap_free(d);
704         }
705
706         /* Third step: actually set the uids */
707         if (setresuid(uid, uid, uid) < 0)
708                 return -errno;
709
710         /* At this point we should have all necessary capabilities but
711            are otherwise a normal user. However, the caps might got
712            corrupted due to the setresuid() so we need clean them up
713            later. This is done outside of this call. */
714
715         return 0;
716 }
717
718 #ifdef HAVE_PAM
719
720 static int null_conv(
721                 int num_msg,
722                 const struct pam_message **msg,
723                 struct pam_response **resp,
724                 void *appdata_ptr) {
725
726         /* We don't support conversations */
727
728         return PAM_CONV_ERR;
729 }
730
731 static int setup_pam(
732                 const char *name,
733                 const char *user,
734                 uid_t uid,
735                 const char *tty,
736                 char ***pam_env,
737                 int fds[], unsigned n_fds) {
738
739         static const struct pam_conv conv = {
740                 .conv = null_conv,
741                 .appdata_ptr = NULL
742         };
743
744         pam_handle_t *handle = NULL;
745         sigset_t ss, old_ss;
746         int pam_code = PAM_SUCCESS;
747         int err;
748         char **e = NULL;
749         bool close_session = false;
750         pid_t pam_pid = 0, parent_pid;
751
752         assert(name);
753         assert(user);
754         assert(pam_env);
755
756         /* We set up PAM in the parent process, then fork. The child
757          * will then stay around until killed via PR_GET_PDEATHSIG or
758          * systemd via the cgroup logic. It will then remove the PAM
759          * session again. The parent process will exec() the actual
760          * daemon. We do things this way to ensure that the main PID
761          * of the daemon is the one we initially fork()ed. */
762
763         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
764                 handle = NULL;
765                 goto fail;
766         }
767
768         if (tty)
769                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
770                         goto fail;
771
772         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
773                 goto fail;
774
775         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
776                 goto fail;
777
778         close_session = true;
779
780         if ((!(e = pam_getenvlist(handle)))) {
781                 pam_code = PAM_BUF_ERR;
782                 goto fail;
783         }
784
785         /* Block SIGTERM, so that we know that it won't get lost in
786          * the child */
787         if (sigemptyset(&ss) < 0 ||
788             sigaddset(&ss, SIGTERM) < 0 ||
789             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
790                 goto fail;
791
792         parent_pid = getpid();
793
794         if ((pam_pid = fork()) < 0)
795                 goto fail;
796
797         if (pam_pid == 0) {
798                 int sig;
799                 int r = EXIT_PAM;
800
801                 /* The child's job is to reset the PAM session on
802                  * termination */
803
804                 /* This string must fit in 10 chars (i.e. the length
805                  * of "/sbin/init"), to look pretty in /bin/ps */
806                 rename_process("(sd-pam)");
807
808                 /* Make sure we don't keep open the passed fds in this
809                 child. We assume that otherwise only those fds are
810                 open here that have been opened by PAM. */
811                 close_many(fds, n_fds);
812
813                 /* Drop privileges - we don't need any to pam_close_session
814                  * and this will make PR_SET_PDEATHSIG work in most cases.
815                  * If this fails, ignore the error - but expect sd-pam threads
816                  * to fail to exit normally */
817                 if (setresuid(uid, uid, uid) < 0)
818                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
819
820                 /* Wait until our parent died. This will only work if
821                  * the above setresuid() succeeds, otherwise the kernel
822                  * will not allow unprivileged parents kill their privileged
823                  * children this way. We rely on the control groups kill logic
824                  * to do the rest for us. */
825                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
826                         goto child_finish;
827
828                 /* Check if our parent process might already have
829                  * died? */
830                 if (getppid() == parent_pid) {
831                         for (;;) {
832                                 if (sigwait(&ss, &sig) < 0) {
833                                         if (errno == EINTR)
834                                                 continue;
835
836                                         goto child_finish;
837                                 }
838
839                                 assert(sig == SIGTERM);
840                                 break;
841                         }
842                 }
843
844                 /* If our parent died we'll end the session */
845                 if (getppid() != parent_pid)
846                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
847                                 goto child_finish;
848
849                 r = 0;
850
851         child_finish:
852                 pam_end(handle, pam_code | PAM_DATA_SILENT);
853                 _exit(r);
854         }
855
856         /* If the child was forked off successfully it will do all the
857          * cleanups, so forget about the handle here. */
858         handle = NULL;
859
860         /* Unblock SIGTERM again in the parent */
861         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
862                 goto fail;
863
864         /* We close the log explicitly here, since the PAM modules
865          * might have opened it, but we don't want this fd around. */
866         closelog();
867
868         *pam_env = e;
869         e = NULL;
870
871         return 0;
872
873 fail:
874         if (pam_code != PAM_SUCCESS)
875                 err = -EPERM;  /* PAM errors do not map to errno */
876         else
877                 err = -errno;
878
879         if (handle) {
880                 if (close_session)
881                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
882
883                 pam_end(handle, pam_code | PAM_DATA_SILENT);
884         }
885
886         strv_free(e);
887
888         closelog();
889
890         if (pam_pid > 1) {
891                 kill(pam_pid, SIGTERM);
892                 kill(pam_pid, SIGCONT);
893         }
894
895         return err;
896 }
897 #endif
898
899 static void rename_process_from_path(const char *path) {
900         char process_name[11];
901         const char *p;
902         size_t l;
903
904         /* This resulting string must fit in 10 chars (i.e. the length
905          * of "/sbin/init") to look pretty in /bin/ps */
906
907         p = path_get_file_name(path);
908         if (isempty(p)) {
909                 rename_process("(...)");
910                 return;
911         }
912
913         l = strlen(p);
914         if (l > 8) {
915                 /* The end of the process name is usually more
916                  * interesting, since the first bit might just be
917                  * "systemd-" */
918                 p = p + l - 8;
919                 l = 8;
920         }
921
922         process_name[0] = '(';
923         memcpy(process_name+1, p, l);
924         process_name[1+l] = ')';
925         process_name[1+l+1] = 0;
926
927         rename_process(process_name);
928 }
929
930 static int apply_seccomp(uint32_t *syscall_filter) {
931         static const struct sock_filter header[] = {
932                 VALIDATE_ARCHITECTURE,
933                 EXAMINE_SYSCALL
934         };
935         static const struct sock_filter footer[] = {
936                 _KILL_PROCESS
937         };
938
939         int i;
940         unsigned n;
941         struct sock_filter *f;
942         struct sock_fprog prog;
943
944         assert(syscall_filter);
945
946         /* First: count the syscalls to check for */
947         for (i = 0, n = 0; i < syscall_max(); i++)
948                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
949                         n++;
950
951         /* Second: build the filter program from a header the syscall
952          * matches and the footer */
953         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
954         memcpy(f, header, sizeof(header));
955
956         for (i = 0, n = 0; i < syscall_max(); i++)
957                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
958                         struct sock_filter item[] = {
959                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
960                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
961                         };
962
963                         assert_cc(ELEMENTSOF(item) == 2);
964
965                         f[ELEMENTSOF(header) + 2*n]  = item[0];
966                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
967
968                         n++;
969                 }
970
971         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
972
973         /* Third: install the filter */
974         zero(prog);
975         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
976         prog.filter = f;
977         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
978                 return -errno;
979
980         return 0;
981 }
982
983 int exec_spawn(ExecCommand *command,
984                char **argv,
985                const ExecContext *context,
986                int fds[], unsigned n_fds,
987                char **environment,
988                bool apply_permissions,
989                bool apply_chroot,
990                bool apply_tty_stdin,
991                bool confirm_spawn,
992                CGroupBonding *cgroup_bondings,
993                CGroupAttribute *cgroup_attributes,
994                const char *cgroup_suffix,
995                const char *unit_id,
996                int idle_pipe[2],
997                pid_t *ret) {
998
999         pid_t pid;
1000         int r;
1001         char *line;
1002         int socket_fd;
1003         char _cleanup_strv_free_ **files_env = NULL;
1004
1005         assert(command);
1006         assert(context);
1007         assert(ret);
1008         assert(fds || n_fds <= 0);
1009
1010         if (context->std_input == EXEC_INPUT_SOCKET ||
1011             context->std_output == EXEC_OUTPUT_SOCKET ||
1012             context->std_error == EXEC_OUTPUT_SOCKET) {
1013
1014                 if (n_fds != 1)
1015                         return -EINVAL;
1016
1017                 socket_fd = fds[0];
1018
1019                 fds = NULL;
1020                 n_fds = 0;
1021         } else
1022                 socket_fd = -1;
1023
1024         r = exec_context_load_environment(context, &files_env);
1025         if (r < 0) {
1026                 log_struct(LOG_ERR,
1027                            "UNIT=%s", unit_id,
1028                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1029                            "ERRNO=%d", -r,
1030                            NULL);
1031                 return r;
1032         }
1033
1034         if (!argv)
1035                 argv = command->argv;
1036
1037         line = exec_command_line(argv);
1038         if (!line)
1039                 return log_oom();
1040
1041         log_struct(LOG_DEBUG,
1042                    "UNIT=%s", unit_id,
1043                    "MESSAGE=About to execute %s", line,
1044                    NULL);
1045         free(line);
1046
1047         r = cgroup_bonding_realize_list(cgroup_bondings);
1048         if (r < 0)
1049                 return r;
1050
1051         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1052
1053         pid = fork();
1054         if (pid < 0)
1055                 return -errno;
1056
1057         if (pid == 0) {
1058                 int i, err;
1059                 sigset_t ss;
1060                 const char *username = NULL, *home = NULL;
1061                 uid_t uid = (uid_t) -1;
1062                 gid_t gid = (gid_t) -1;
1063                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1064                         **final_env = NULL, **final_argv = NULL;
1065                 unsigned n_env = 0;
1066                 bool set_access = false;
1067
1068                 /* child */
1069
1070                 rename_process_from_path(command->path);
1071
1072                 /* We reset exactly these signals, since they are the
1073                  * only ones we set to SIG_IGN in the main daemon. All
1074                  * others we leave untouched because we set them to
1075                  * SIG_DFL or a valid handler initially, both of which
1076                  * will be demoted to SIG_DFL. */
1077                 default_signals(SIGNALS_CRASH_HANDLER,
1078                                 SIGNALS_IGNORE, -1);
1079
1080                 if (context->ignore_sigpipe)
1081                         ignore_signals(SIGPIPE, -1);
1082
1083                 assert_se(sigemptyset(&ss) == 0);
1084                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1085                         err = -errno;
1086                         r = EXIT_SIGNAL_MASK;
1087                         goto fail_child;
1088                 }
1089
1090                 if (idle_pipe) {
1091                         if (idle_pipe[1] >= 0)
1092                                 close_nointr_nofail(idle_pipe[1]);
1093                         if (idle_pipe[0] >= 0) {
1094                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1095                                 close_nointr_nofail(idle_pipe[0]);
1096                         }
1097                 }
1098
1099                 /* Close sockets very early to make sure we don't
1100                  * block init reexecution because it cannot bind its
1101                  * sockets */
1102                 log_forget_fds();
1103                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1104                                            socket_fd >= 0 ? 1 : n_fds);
1105                 if (err < 0) {
1106                         r = EXIT_FDS;
1107                         goto fail_child;
1108                 }
1109
1110                 if (!context->same_pgrp)
1111                         if (setsid() < 0) {
1112                                 err = -errno;
1113                                 r = EXIT_SETSID;
1114                                 goto fail_child;
1115                         }
1116
1117                 if (context->tcpwrap_name) {
1118                         if (socket_fd >= 0)
1119                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1120                                         err = -EACCES;
1121                                         r = EXIT_TCPWRAP;
1122                                         goto fail_child;
1123                                 }
1124
1125                         for (i = 0; i < (int) n_fds; i++) {
1126                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1127                                         err = -EACCES;
1128                                         r = EXIT_TCPWRAP;
1129                                         goto fail_child;
1130                                 }
1131                         }
1132                 }
1133
1134                 exec_context_tty_reset(context);
1135
1136                 if (confirm_spawn) {
1137                         char response;
1138
1139                         err = ask_for_confirmation(&response, argv);
1140                         if (err == -ETIMEDOUT)
1141                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1142                         else if (err < 0)
1143                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1144                         else if (response == 's') {
1145                                 write_confirm_message("Skipping execution.\n");
1146                                 err = -ECANCELED;
1147                                 r = EXIT_CONFIRM;
1148                                 goto fail_child;
1149                         } else if (response == 'n') {
1150                                 write_confirm_message("Failing execution.\n");
1151                                 err = r = 0;
1152                                 goto fail_child;
1153                         }
1154                 }
1155
1156                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1157                  * must sure to drop O_NONBLOCK */
1158                 if (socket_fd >= 0)
1159                         fd_nonblock(socket_fd, false);
1160
1161                 err = setup_input(context, socket_fd, apply_tty_stdin);
1162                 if (err < 0) {
1163                         r = EXIT_STDIN;
1164                         goto fail_child;
1165                 }
1166
1167                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1168                 if (err < 0) {
1169                         r = EXIT_STDOUT;
1170                         goto fail_child;
1171                 }
1172
1173                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1174                 if (err < 0) {
1175                         r = EXIT_STDERR;
1176                         goto fail_child;
1177                 }
1178
1179                 if (cgroup_bondings) {
1180                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1181                         if (err < 0) {
1182                                 r = EXIT_CGROUP;
1183                                 goto fail_child;
1184                         }
1185                 }
1186
1187                 if (context->oom_score_adjust_set) {
1188                         char t[16];
1189
1190                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1191                         char_array_0(t);
1192
1193                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1194                                 err = -errno;
1195                                 r = EXIT_OOM_ADJUST;
1196                                 goto fail_child;
1197                         }
1198                 }
1199
1200                 if (context->nice_set)
1201                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1202                                 err = -errno;
1203                                 r = EXIT_NICE;
1204                                 goto fail_child;
1205                         }
1206
1207                 if (context->cpu_sched_set) {
1208                         struct sched_param param;
1209
1210                         zero(param);
1211                         param.sched_priority = context->cpu_sched_priority;
1212
1213                         if (sched_setscheduler(0, context->cpu_sched_policy |
1214                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1215                                 err = -errno;
1216                                 r = EXIT_SETSCHEDULER;
1217                                 goto fail_child;
1218                         }
1219                 }
1220
1221                 if (context->cpuset)
1222                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1223                                 err = -errno;
1224                                 r = EXIT_CPUAFFINITY;
1225                                 goto fail_child;
1226                         }
1227
1228                 if (context->ioprio_set)
1229                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1230                                 err = -errno;
1231                                 r = EXIT_IOPRIO;
1232                                 goto fail_child;
1233                         }
1234
1235                 if (context->timer_slack_nsec != (nsec_t) -1)
1236                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1237                                 err = -errno;
1238                                 r = EXIT_TIMERSLACK;
1239                                 goto fail_child;
1240                         }
1241
1242                 if (context->utmp_id)
1243                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1244
1245                 if (context->user) {
1246                         username = context->user;
1247                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1248                         if (err < 0) {
1249                                 r = EXIT_USER;
1250                                 goto fail_child;
1251                         }
1252
1253                         if (is_terminal_input(context->std_input)) {
1254                                 err = chown_terminal(STDIN_FILENO, uid);
1255                                 if (err < 0) {
1256                                         r = EXIT_STDIN;
1257                                         goto fail_child;
1258                                 }
1259                         }
1260
1261                         if (cgroup_bondings && context->control_group_modify) {
1262                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1263                                 if (err >= 0)
1264                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1265                                 if (err < 0) {
1266                                         r = EXIT_CGROUP;
1267                                         goto fail_child;
1268                                 }
1269
1270                                 set_access = true;
1271                         }
1272                 }
1273
1274                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1275                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1276                         if (err < 0) {
1277                                 r = EXIT_CGROUP;
1278                                 goto fail_child;
1279                         }
1280                 }
1281
1282                 if (apply_permissions) {
1283                         err = enforce_groups(context, username, gid);
1284                         if (err < 0) {
1285                                 r = EXIT_GROUP;
1286                                 goto fail_child;
1287                         }
1288                 }
1289
1290                 umask(context->umask);
1291
1292 #ifdef HAVE_PAM
1293                 if (apply_permissions && context->pam_name && username) {
1294                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1295                         if (err < 0) {
1296                                 r = EXIT_PAM;
1297                                 goto fail_child;
1298                         }
1299                 }
1300 #endif
1301                 if (context->private_network) {
1302                         if (unshare(CLONE_NEWNET) < 0) {
1303                                 err = -errno;
1304                                 r = EXIT_NETWORK;
1305                                 goto fail_child;
1306                         }
1307
1308                         loopback_setup();
1309                 }
1310
1311                 if (strv_length(context->read_write_dirs) > 0 ||
1312                     strv_length(context->read_only_dirs) > 0 ||
1313                     strv_length(context->inaccessible_dirs) > 0 ||
1314                     context->mount_flags != 0 ||
1315                     context->private_tmp) {
1316                         err = setup_namespace(context->read_write_dirs,
1317                                               context->read_only_dirs,
1318                                               context->inaccessible_dirs,
1319                                               context->private_tmp,
1320                                               context->mount_flags);
1321                         if (err < 0) {
1322                                 r = EXIT_NAMESPACE;
1323                                 goto fail_child;
1324                         }
1325                 }
1326
1327                 if (apply_chroot) {
1328                         if (context->root_directory)
1329                                 if (chroot(context->root_directory) < 0) {
1330                                         err = -errno;
1331                                         r = EXIT_CHROOT;
1332                                         goto fail_child;
1333                                 }
1334
1335                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1336                                 err = -errno;
1337                                 r = EXIT_CHDIR;
1338                                 goto fail_child;
1339                         }
1340                 } else {
1341                         char _cleanup_free_ *d = NULL;
1342
1343                         if (asprintf(&d, "%s/%s",
1344                                      context->root_directory ? context->root_directory : "",
1345                                      context->working_directory ? context->working_directory : "") < 0) {
1346                                 err = -ENOMEM;
1347                                 r = EXIT_MEMORY;
1348                                 goto fail_child;
1349                         }
1350
1351                         if (chdir(d) < 0) {
1352                                 err = -errno;
1353                                 r = EXIT_CHDIR;
1354                                 goto fail_child;
1355                         }
1356                 }
1357
1358                 /* We repeat the fd closing here, to make sure that
1359                  * nothing is leaked from the PAM modules */
1360                 err = close_all_fds(fds, n_fds);
1361                 if (err >= 0)
1362                         err = shift_fds(fds, n_fds);
1363                 if (err >= 0)
1364                         err = flags_fds(fds, n_fds, context->non_blocking);
1365                 if (err < 0) {
1366                         r = EXIT_FDS;
1367                         goto fail_child;
1368                 }
1369
1370                 if (apply_permissions) {
1371
1372                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1373                                 if (!context->rlimit[i])
1374                                         continue;
1375
1376                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1377                                         err = -errno;
1378                                         r = EXIT_LIMITS;
1379                                         goto fail_child;
1380                                 }
1381                         }
1382
1383                         if (context->capability_bounding_set_drop) {
1384                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1385                                 if (err < 0) {
1386                                         r = EXIT_CAPABILITIES;
1387                                         goto fail_child;
1388                                 }
1389                         }
1390
1391                         if (context->user) {
1392                                 err = enforce_user(context, uid);
1393                                 if (err < 0) {
1394                                         r = EXIT_USER;
1395                                         goto fail_child;
1396                                 }
1397                         }
1398
1399                         /* PR_GET_SECUREBITS is not privileged, while
1400                          * PR_SET_SECUREBITS is. So to suppress
1401                          * potential EPERMs we'll try not to call
1402                          * PR_SET_SECUREBITS unless necessary. */
1403                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1404                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1405                                         err = -errno;
1406                                         r = EXIT_SECUREBITS;
1407                                         goto fail_child;
1408                                 }
1409
1410                         if (context->capabilities)
1411                                 if (cap_set_proc(context->capabilities) < 0) {
1412                                         err = -errno;
1413                                         r = EXIT_CAPABILITIES;
1414                                         goto fail_child;
1415                                 }
1416
1417                         if (context->no_new_privileges)
1418                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1419                                         err = -errno;
1420                                         r = EXIT_NO_NEW_PRIVILEGES;
1421                                         goto fail_child;
1422                                 }
1423
1424                         if (context->syscall_filter) {
1425                                 err = apply_seccomp(context->syscall_filter);
1426                                 if (err < 0) {
1427                                         r = EXIT_SECCOMP;
1428                                         goto fail_child;
1429                                 }
1430                         }
1431                 }
1432
1433                 if (!(our_env = new0(char*, 7))) {
1434                         err = -ENOMEM;
1435                         r = EXIT_MEMORY;
1436                         goto fail_child;
1437                 }
1438
1439                 if (n_fds > 0)
1440                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1441                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1442                                 err = -ENOMEM;
1443                                 r = EXIT_MEMORY;
1444                                 goto fail_child;
1445                         }
1446
1447                 if (home)
1448                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1449                                 err = -ENOMEM;
1450                                 r = EXIT_MEMORY;
1451                                 goto fail_child;
1452                         }
1453
1454                 if (username)
1455                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1456                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1457                                 err = -ENOMEM;
1458                                 r = EXIT_MEMORY;
1459                                 goto fail_child;
1460                         }
1461
1462                 if (is_terminal_input(context->std_input) ||
1463                     context->std_output == EXEC_OUTPUT_TTY ||
1464                     context->std_error == EXEC_OUTPUT_TTY)
1465                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1466                                 err = -ENOMEM;
1467                                 r = EXIT_MEMORY;
1468                                 goto fail_child;
1469                         }
1470
1471                 assert(n_env <= 7);
1472
1473                 if (!(final_env = strv_env_merge(
1474                                       5,
1475                                       environment,
1476                                       our_env,
1477                                       context->environment,
1478                                       files_env,
1479                                       pam_env,
1480                                       NULL))) {
1481                         err = -ENOMEM;
1482                         r = EXIT_MEMORY;
1483                         goto fail_child;
1484                 }
1485
1486                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1487                         err = -ENOMEM;
1488                         r = EXIT_MEMORY;
1489                         goto fail_child;
1490                 }
1491
1492                 final_env = strv_env_clean(final_env);
1493
1494                 execve(command->path, final_argv, final_env);
1495                 err = -errno;
1496                 r = EXIT_EXEC;
1497
1498         fail_child:
1499                 if (r != 0) {
1500                         log_open();
1501                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1502                                    "EXECUTABLE=%s", command->path,
1503                                    "MESSAGE=Failed at step %s spawning %s: %s",
1504                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1505                                           command->path, strerror(-err),
1506                                    "ERRNO=%d", -err,
1507                                    NULL);
1508                         log_close();
1509                 }
1510
1511                 _exit(r);
1512         }
1513
1514         log_struct(LOG_DEBUG,
1515                    "UNIT=%s", unit_id,
1516                    "MESSAGE=Forked %s as %lu",
1517                           command->path, (unsigned long) pid,
1518                    NULL);
1519
1520         /* We add the new process to the cgroup both in the child (so
1521          * that we can be sure that no user code is ever executed
1522          * outside of the cgroup) and in the parent (so that we can be
1523          * sure that when we kill the cgroup the process will be
1524          * killed too). */
1525         if (cgroup_bondings)
1526                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1527
1528         exec_status_start(&command->exec_status, pid);
1529
1530         *ret = pid;
1531         return 0;
1532 }
1533
1534 void exec_context_init(ExecContext *c) {
1535         assert(c);
1536
1537         c->umask = 0022;
1538         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1539         c->cpu_sched_policy = SCHED_OTHER;
1540         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1541         c->syslog_level_prefix = true;
1542         c->control_group_persistent = -1;
1543         c->ignore_sigpipe = true;
1544         c->timer_slack_nsec = (nsec_t) -1;
1545 }
1546
1547 void exec_context_done(ExecContext *c) {
1548         unsigned l;
1549
1550         assert(c);
1551
1552         strv_free(c->environment);
1553         c->environment = NULL;
1554
1555         strv_free(c->environment_files);
1556         c->environment_files = NULL;
1557
1558         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1559                 free(c->rlimit[l]);
1560                 c->rlimit[l] = NULL;
1561         }
1562
1563         free(c->working_directory);
1564         c->working_directory = NULL;
1565         free(c->root_directory);
1566         c->root_directory = NULL;
1567
1568         free(c->tty_path);
1569         c->tty_path = NULL;
1570
1571         free(c->tcpwrap_name);
1572         c->tcpwrap_name = NULL;
1573
1574         free(c->syslog_identifier);
1575         c->syslog_identifier = NULL;
1576
1577         free(c->user);
1578         c->user = NULL;
1579
1580         free(c->group);
1581         c->group = NULL;
1582
1583         strv_free(c->supplementary_groups);
1584         c->supplementary_groups = NULL;
1585
1586         free(c->pam_name);
1587         c->pam_name = NULL;
1588
1589         if (c->capabilities) {
1590                 cap_free(c->capabilities);
1591                 c->capabilities = NULL;
1592         }
1593
1594         strv_free(c->read_only_dirs);
1595         c->read_only_dirs = NULL;
1596
1597         strv_free(c->read_write_dirs);
1598         c->read_write_dirs = NULL;
1599
1600         strv_free(c->inaccessible_dirs);
1601         c->inaccessible_dirs = NULL;
1602
1603         if (c->cpuset)
1604                 CPU_FREE(c->cpuset);
1605
1606         free(c->utmp_id);
1607         c->utmp_id = NULL;
1608
1609         free(c->syscall_filter);
1610         c->syscall_filter = NULL;
1611 }
1612
1613 void exec_command_done(ExecCommand *c) {
1614         assert(c);
1615
1616         free(c->path);
1617         c->path = NULL;
1618
1619         strv_free(c->argv);
1620         c->argv = NULL;
1621 }
1622
1623 void exec_command_done_array(ExecCommand *c, unsigned n) {
1624         unsigned i;
1625
1626         for (i = 0; i < n; i++)
1627                 exec_command_done(c+i);
1628 }
1629
1630 void exec_command_free_list(ExecCommand *c) {
1631         ExecCommand *i;
1632
1633         while ((i = c)) {
1634                 LIST_REMOVE(ExecCommand, command, c, i);
1635                 exec_command_done(i);
1636                 free(i);
1637         }
1638 }
1639
1640 void exec_command_free_array(ExecCommand **c, unsigned n) {
1641         unsigned i;
1642
1643         for (i = 0; i < n; i++) {
1644                 exec_command_free_list(c[i]);
1645                 c[i] = NULL;
1646         }
1647 }
1648
1649 int exec_context_load_environment(const ExecContext *c, char ***l) {
1650         char **i, **r = NULL;
1651
1652         assert(c);
1653         assert(l);
1654
1655         STRV_FOREACH(i, c->environment_files) {
1656                 char *fn;
1657                 int k;
1658                 bool ignore = false;
1659                 char **p;
1660
1661                 fn = *i;
1662
1663                 if (fn[0] == '-') {
1664                         ignore = true;
1665                         fn ++;
1666                 }
1667
1668                 if (!path_is_absolute(fn)) {
1669
1670                         if (ignore)
1671                                 continue;
1672
1673                         strv_free(r);
1674                         return -EINVAL;
1675                 }
1676
1677                 if ((k = load_env_file(fn, &p)) < 0) {
1678
1679                         if (ignore)
1680                                 continue;
1681
1682                         strv_free(r);
1683                         return k;
1684                 }
1685
1686                 if (r == NULL)
1687                         r = p;
1688                 else {
1689                         char **m;
1690
1691                         m = strv_env_merge(2, r, p);
1692                         strv_free(r);
1693                         strv_free(p);
1694
1695                         if (!m)
1696                                 return -ENOMEM;
1697
1698                         r = m;
1699                 }
1700         }
1701
1702         *l = r;
1703
1704         return 0;
1705 }
1706
1707 static void strv_fprintf(FILE *f, char **l) {
1708         char **g;
1709
1710         assert(f);
1711
1712         STRV_FOREACH(g, l)
1713                 fprintf(f, " %s", *g);
1714 }
1715
1716 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1717         char ** e;
1718         unsigned i;
1719
1720         assert(c);
1721         assert(f);
1722
1723         if (!prefix)
1724                 prefix = "";
1725
1726         fprintf(f,
1727                 "%sUMask: %04o\n"
1728                 "%sWorkingDirectory: %s\n"
1729                 "%sRootDirectory: %s\n"
1730                 "%sNonBlocking: %s\n"
1731                 "%sPrivateTmp: %s\n"
1732                 "%sControlGroupModify: %s\n"
1733                 "%sControlGroupPersistent: %s\n"
1734                 "%sPrivateNetwork: %s\n"
1735                 "%sIgnoreSIGPIPE: %s\n",
1736                 prefix, c->umask,
1737                 prefix, c->working_directory ? c->working_directory : "/",
1738                 prefix, c->root_directory ? c->root_directory : "/",
1739                 prefix, yes_no(c->non_blocking),
1740                 prefix, yes_no(c->private_tmp),
1741                 prefix, yes_no(c->control_group_modify),
1742                 prefix, yes_no(c->control_group_persistent),
1743                 prefix, yes_no(c->private_network),
1744                 prefix, yes_no(c->ignore_sigpipe));
1745
1746         STRV_FOREACH(e, c->environment)
1747                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1748
1749         STRV_FOREACH(e, c->environment_files)
1750                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1751
1752         if (c->tcpwrap_name)
1753                 fprintf(f,
1754                         "%sTCPWrapName: %s\n",
1755                         prefix, c->tcpwrap_name);
1756
1757         if (c->nice_set)
1758                 fprintf(f,
1759                         "%sNice: %i\n",
1760                         prefix, c->nice);
1761
1762         if (c->oom_score_adjust_set)
1763                 fprintf(f,
1764                         "%sOOMScoreAdjust: %i\n",
1765                         prefix, c->oom_score_adjust);
1766
1767         for (i = 0; i < RLIM_NLIMITS; i++)
1768                 if (c->rlimit[i])
1769                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1770
1771         if (c->ioprio_set) {
1772                 char *class_str;
1773                 int r;
1774
1775                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1776                 if (r < 0)
1777                         class_str = NULL;
1778                 fprintf(f,
1779                         "%sIOSchedulingClass: %s\n"
1780                         "%sIOPriority: %i\n",
1781                         prefix, strna(class_str),
1782                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1783                 free(class_str);
1784         }
1785
1786         if (c->cpu_sched_set) {
1787                 char *policy_str;
1788                 int r;
1789
1790                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1791                 if (r < 0)
1792                         policy_str = NULL;
1793                 fprintf(f,
1794                         "%sCPUSchedulingPolicy: %s\n"
1795                         "%sCPUSchedulingPriority: %i\n"
1796                         "%sCPUSchedulingResetOnFork: %s\n",
1797                         prefix, strna(policy_str),
1798                         prefix, c->cpu_sched_priority,
1799                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1800                 free(policy_str);
1801         }
1802
1803         if (c->cpuset) {
1804                 fprintf(f, "%sCPUAffinity:", prefix);
1805                 for (i = 0; i < c->cpuset_ncpus; i++)
1806                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1807                                 fprintf(f, " %i", i);
1808                 fputs("\n", f);
1809         }
1810
1811         if (c->timer_slack_nsec != (nsec_t) -1)
1812                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1813
1814         fprintf(f,
1815                 "%sStandardInput: %s\n"
1816                 "%sStandardOutput: %s\n"
1817                 "%sStandardError: %s\n",
1818                 prefix, exec_input_to_string(c->std_input),
1819                 prefix, exec_output_to_string(c->std_output),
1820                 prefix, exec_output_to_string(c->std_error));
1821
1822         if (c->tty_path)
1823                 fprintf(f,
1824                         "%sTTYPath: %s\n"
1825                         "%sTTYReset: %s\n"
1826                         "%sTTYVHangup: %s\n"
1827                         "%sTTYVTDisallocate: %s\n",
1828                         prefix, c->tty_path,
1829                         prefix, yes_no(c->tty_reset),
1830                         prefix, yes_no(c->tty_vhangup),
1831                         prefix, yes_no(c->tty_vt_disallocate));
1832
1833         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1834             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1835             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1836             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1837                 char *fac_str, *lvl_str;
1838                 int r;
1839
1840                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1841                 if (r < 0)
1842                         fac_str = NULL;
1843
1844                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1845                 if (r < 0)
1846                         lvl_str = NULL;
1847
1848                 fprintf(f,
1849                         "%sSyslogFacility: %s\n"
1850                         "%sSyslogLevel: %s\n",
1851                         prefix, strna(fac_str),
1852                         prefix, strna(lvl_str));
1853                 free(lvl_str);
1854                 free(fac_str);
1855         }
1856
1857         if (c->capabilities) {
1858                 char *t;
1859                 if ((t = cap_to_text(c->capabilities, NULL))) {
1860                         fprintf(f, "%sCapabilities: %s\n",
1861                                 prefix, t);
1862                         cap_free(t);
1863                 }
1864         }
1865
1866         if (c->secure_bits)
1867                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1868                         prefix,
1869                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1870                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1871                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1872                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1873                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1874                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1875
1876         if (c->capability_bounding_set_drop) {
1877                 unsigned long l;
1878                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1879
1880                 for (l = 0; l <= cap_last_cap(); l++)
1881                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1882                                 char *t;
1883
1884                                 if ((t = cap_to_name(l))) {
1885                                         fprintf(f, " %s", t);
1886                                         cap_free(t);
1887                                 }
1888                         }
1889
1890                 fputs("\n", f);
1891         }
1892
1893         if (c->user)
1894                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1895         if (c->group)
1896                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1897
1898         if (strv_length(c->supplementary_groups) > 0) {
1899                 fprintf(f, "%sSupplementaryGroups:", prefix);
1900                 strv_fprintf(f, c->supplementary_groups);
1901                 fputs("\n", f);
1902         }
1903
1904         if (c->pam_name)
1905                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1906
1907         if (strv_length(c->read_write_dirs) > 0) {
1908                 fprintf(f, "%sReadWriteDirs:", prefix);
1909                 strv_fprintf(f, c->read_write_dirs);
1910                 fputs("\n", f);
1911         }
1912
1913         if (strv_length(c->read_only_dirs) > 0) {
1914                 fprintf(f, "%sReadOnlyDirs:", prefix);
1915                 strv_fprintf(f, c->read_only_dirs);
1916                 fputs("\n", f);
1917         }
1918
1919         if (strv_length(c->inaccessible_dirs) > 0) {
1920                 fprintf(f, "%sInaccessibleDirs:", prefix);
1921                 strv_fprintf(f, c->inaccessible_dirs);
1922                 fputs("\n", f);
1923         }
1924
1925         if (c->utmp_id)
1926                 fprintf(f,
1927                         "%sUtmpIdentifier: %s\n",
1928                         prefix, c->utmp_id);
1929 }
1930
1931 void exec_status_start(ExecStatus *s, pid_t pid) {
1932         assert(s);
1933
1934         zero(*s);
1935         s->pid = pid;
1936         dual_timestamp_get(&s->start_timestamp);
1937 }
1938
1939 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1940         assert(s);
1941
1942         if (s->pid && s->pid != pid)
1943                 zero(*s);
1944
1945         s->pid = pid;
1946         dual_timestamp_get(&s->exit_timestamp);
1947
1948         s->code = code;
1949         s->status = status;
1950
1951         if (context) {
1952                 if (context->utmp_id)
1953                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1954
1955                 exec_context_tty_reset(context);
1956         }
1957 }
1958
1959 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1960         char buf[FORMAT_TIMESTAMP_MAX];
1961
1962         assert(s);
1963         assert(f);
1964
1965         if (!prefix)
1966                 prefix = "";
1967
1968         if (s->pid <= 0)
1969                 return;
1970
1971         fprintf(f,
1972                 "%sPID: %lu\n",
1973                 prefix, (unsigned long) s->pid);
1974
1975         if (s->start_timestamp.realtime > 0)
1976                 fprintf(f,
1977                         "%sStart Timestamp: %s\n",
1978                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1979
1980         if (s->exit_timestamp.realtime > 0)
1981                 fprintf(f,
1982                         "%sExit Timestamp: %s\n"
1983                         "%sExit Code: %s\n"
1984                         "%sExit Status: %i\n",
1985                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1986                         prefix, sigchld_code_to_string(s->code),
1987                         prefix, s->status);
1988 }
1989
1990 char *exec_command_line(char **argv) {
1991         size_t k;
1992         char *n, *p, **a;
1993         bool first = true;
1994
1995         assert(argv);
1996
1997         k = 1;
1998         STRV_FOREACH(a, argv)
1999                 k += strlen(*a)+3;
2000
2001         if (!(n = new(char, k)))
2002                 return NULL;
2003
2004         p = n;
2005         STRV_FOREACH(a, argv) {
2006
2007                 if (!first)
2008                         *(p++) = ' ';
2009                 else
2010                         first = false;
2011
2012                 if (strpbrk(*a, WHITESPACE)) {
2013                         *(p++) = '\'';
2014                         p = stpcpy(p, *a);
2015                         *(p++) = '\'';
2016                 } else
2017                         p = stpcpy(p, *a);
2018
2019         }
2020
2021         *p = 0;
2022
2023         /* FIXME: this doesn't really handle arguments that have
2024          * spaces and ticks in them */
2025
2026         return n;
2027 }
2028
2029 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2030         char *p2;
2031         const char *prefix2;
2032
2033         char *cmd;
2034
2035         assert(c);
2036         assert(f);
2037
2038         if (!prefix)
2039                 prefix = "";
2040         p2 = strappend(prefix, "\t");
2041         prefix2 = p2 ? p2 : prefix;
2042
2043         cmd = exec_command_line(c->argv);
2044
2045         fprintf(f,
2046                 "%sCommand Line: %s\n",
2047                 prefix, cmd ? cmd : strerror(ENOMEM));
2048
2049         free(cmd);
2050
2051         exec_status_dump(&c->exec_status, f, prefix2);
2052
2053         free(p2);
2054 }
2055
2056 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2057         assert(f);
2058
2059         if (!prefix)
2060                 prefix = "";
2061
2062         LIST_FOREACH(command, c, c)
2063                 exec_command_dump(c, f, prefix);
2064 }
2065
2066 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2067         ExecCommand *end;
2068
2069         assert(l);
2070         assert(e);
2071
2072         if (*l) {
2073                 /* It's kind of important, that we keep the order here */
2074                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2075                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2076         } else
2077               *l = e;
2078 }
2079
2080 int exec_command_set(ExecCommand *c, const char *path, ...) {
2081         va_list ap;
2082         char **l, *p;
2083
2084         assert(c);
2085         assert(path);
2086
2087         va_start(ap, path);
2088         l = strv_new_ap(path, ap);
2089         va_end(ap);
2090
2091         if (!l)
2092                 return -ENOMEM;
2093
2094         if (!(p = strdup(path))) {
2095                 strv_free(l);
2096                 return -ENOMEM;
2097         }
2098
2099         free(c->path);
2100         c->path = p;
2101
2102         strv_free(c->argv);
2103         c->argv = l;
2104
2105         return 0;
2106 }
2107
2108 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2109         [EXEC_INPUT_NULL] = "null",
2110         [EXEC_INPUT_TTY] = "tty",
2111         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2112         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2113         [EXEC_INPUT_SOCKET] = "socket"
2114 };
2115
2116 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2117
2118 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2119         [EXEC_OUTPUT_INHERIT] = "inherit",
2120         [EXEC_OUTPUT_NULL] = "null",
2121         [EXEC_OUTPUT_TTY] = "tty",
2122         [EXEC_OUTPUT_SYSLOG] = "syslog",
2123         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2124         [EXEC_OUTPUT_KMSG] = "kmsg",
2125         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2126         [EXEC_OUTPUT_JOURNAL] = "journal",
2127         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2128         [EXEC_OUTPUT_SOCKET] = "socket"
2129 };
2130
2131 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);