chiark / gitweb /
bb841b7fcf03115962ab5acb53114384ce8c3d0d
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41
42 #ifdef HAVE_PAM
43 #include <security/pam_appl.h>
44 #endif
45
46 #include "execute.h"
47 #include "strv.h"
48 #include "macro.h"
49 #include "capability.h"
50 #include "util.h"
51 #include "log.h"
52 #include "ioprio.h"
53 #include "securebits.h"
54 #include "cgroup.h"
55 #include "namespace.h"
56 #include "tcpwrap.h"
57 #include "exit-status.h"
58 #include "missing.h"
59 #include "utmp-wtmp.h"
60 #include "def.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
63
64 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
65
66 /* This assumes there is a 'tty' group */
67 #define TTY_MODE 0620
68
69 static int shift_fds(int fds[], unsigned n_fds) {
70         int start, restart_from;
71
72         if (n_fds <= 0)
73                 return 0;
74
75         /* Modifies the fds array! (sorts it) */
76
77         assert(fds);
78
79         start = 0;
80         for (;;) {
81                 int i;
82
83                 restart_from = -1;
84
85                 for (i = start; i < (int) n_fds; i++) {
86                         int nfd;
87
88                         /* Already at right index? */
89                         if (fds[i] == i+3)
90                                 continue;
91
92                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
93                                 return -errno;
94
95                         close_nointr_nofail(fds[i]);
96                         fds[i] = nfd;
97
98                         /* Hmm, the fd we wanted isn't free? Then
99                          * let's remember that and try again from here*/
100                         if (nfd != i+3 && restart_from < 0)
101                                 restart_from = i;
102                 }
103
104                 if (restart_from < 0)
105                         break;
106
107                 start = restart_from;
108         }
109
110         return 0;
111 }
112
113 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
114         unsigned i;
115         int r;
116
117         if (n_fds <= 0)
118                 return 0;
119
120         assert(fds);
121
122         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
123
124         for (i = 0; i < n_fds; i++) {
125
126                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
127                         return r;
128
129                 /* We unconditionally drop FD_CLOEXEC from the fds,
130                  * since after all we want to pass these fds to our
131                  * children */
132
133                 if ((r = fd_cloexec(fds[i], false)) < 0)
134                         return r;
135         }
136
137         return 0;
138 }
139
140 static const char *tty_path(const ExecContext *context) {
141         assert(context);
142
143         if (context->tty_path)
144                 return context->tty_path;
145
146         return "/dev/console";
147 }
148
149 void exec_context_tty_reset(const ExecContext *context) {
150         assert(context);
151
152         if (context->tty_vhangup)
153                 terminal_vhangup(tty_path(context));
154
155         if (context->tty_reset)
156                 reset_terminal(tty_path(context));
157
158         if (context->tty_vt_disallocate && context->tty_path)
159                 vt_disallocate(context->tty_path);
160 }
161
162 static int open_null_as(int flags, int nfd) {
163         int fd, r;
164
165         assert(nfd >= 0);
166
167         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
168                 return -errno;
169
170         if (fd != nfd) {
171                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
172                 close_nointr_nofail(fd);
173         } else
174                 r = nfd;
175
176         return r;
177 }
178
179 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
180         int fd, r;
181         union sockaddr_union sa;
182
183         assert(context);
184         assert(output < _EXEC_OUTPUT_MAX);
185         assert(ident);
186         assert(nfd >= 0);
187
188         fd = socket(AF_UNIX, SOCK_STREAM, 0);
189         if (fd < 0)
190                 return -errno;
191
192         zero(sa);
193         sa.un.sun_family = AF_UNIX;
194         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
195
196         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
197         if (r < 0) {
198                 close_nointr_nofail(fd);
199                 return -errno;
200         }
201
202         if (shutdown(fd, SHUT_RD) < 0) {
203                 close_nointr_nofail(fd);
204                 return -errno;
205         }
206
207         dprintf(fd,
208                 "%s\n"
209                 "%i\n"
210                 "%i\n"
211                 "%i\n"
212                 "%i\n"
213                 "%i\n",
214                 context->syslog_identifier ? context->syslog_identifier : ident,
215                 context->syslog_priority,
216                 !!context->syslog_level_prefix,
217                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
218                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
219                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
220
221         if (fd != nfd) {
222                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
223                 close_nointr_nofail(fd);
224         } else
225                 r = nfd;
226
227         return r;
228 }
229 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
230         int fd, r;
231
232         assert(path);
233         assert(nfd >= 0);
234
235         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
236                 return fd;
237
238         if (fd != nfd) {
239                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
240                 close_nointr_nofail(fd);
241         } else
242                 r = nfd;
243
244         return r;
245 }
246
247 static bool is_terminal_input(ExecInput i) {
248         return
249                 i == EXEC_INPUT_TTY ||
250                 i == EXEC_INPUT_TTY_FORCE ||
251                 i == EXEC_INPUT_TTY_FAIL;
252 }
253
254 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
255
256         if (is_terminal_input(std_input) && !apply_tty_stdin)
257                 return EXEC_INPUT_NULL;
258
259         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
260                 return EXEC_INPUT_NULL;
261
262         return std_input;
263 }
264
265 static int fixup_output(ExecOutput std_output, int socket_fd) {
266
267         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
268                 return EXEC_OUTPUT_INHERIT;
269
270         return std_output;
271 }
272
273 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
274         ExecInput i;
275
276         assert(context);
277
278         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
279
280         switch (i) {
281
282         case EXEC_INPUT_NULL:
283                 return open_null_as(O_RDONLY, STDIN_FILENO);
284
285         case EXEC_INPUT_TTY:
286         case EXEC_INPUT_TTY_FORCE:
287         case EXEC_INPUT_TTY_FAIL: {
288                 int fd, r;
289
290                 if ((fd = acquire_terminal(
291                                      tty_path(context),
292                                      i == EXEC_INPUT_TTY_FAIL,
293                                      i == EXEC_INPUT_TTY_FORCE,
294                                      false)) < 0)
295                         return fd;
296
297                 if (fd != STDIN_FILENO) {
298                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
299                         close_nointr_nofail(fd);
300                 } else
301                         r = STDIN_FILENO;
302
303                 return r;
304         }
305
306         case EXEC_INPUT_SOCKET:
307                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308
309         default:
310                 assert_not_reached("Unknown input type");
311         }
312 }
313
314 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
315         ExecOutput o;
316         ExecInput i;
317
318         assert(context);
319         assert(ident);
320
321         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
322         o = fixup_output(context->std_output, socket_fd);
323
324         /* This expects the input is already set up */
325
326         switch (o) {
327
328         case EXEC_OUTPUT_INHERIT:
329
330                 /* If input got downgraded, inherit the original value */
331                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
332                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
333
334                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
335                 if (i != EXEC_INPUT_NULL)
336                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
337
338                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
339                 if (getppid() != 1)
340                         return STDOUT_FILENO;
341
342                 /* We need to open /dev/null here anew, to get the
343                  * right access mode. So we fall through */
344
345         case EXEC_OUTPUT_NULL:
346                 return open_null_as(O_WRONLY, STDOUT_FILENO);
347
348         case EXEC_OUTPUT_TTY:
349                 if (is_terminal_input(i))
350                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
351
352                 /* We don't reset the terminal if this is just about output */
353                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
354
355         case EXEC_OUTPUT_SYSLOG:
356         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
357         case EXEC_OUTPUT_KMSG:
358         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
359         case EXEC_OUTPUT_JOURNAL:
360         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
361                 return connect_logger_as(context, o, ident, STDOUT_FILENO);
362
363         case EXEC_OUTPUT_SOCKET:
364                 assert(socket_fd >= 0);
365                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
366
367         default:
368                 assert_not_reached("Unknown output type");
369         }
370 }
371
372 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
373         ExecOutput o, e;
374         ExecInput i;
375
376         assert(context);
377         assert(ident);
378
379         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
380         o = fixup_output(context->std_output, socket_fd);
381         e = fixup_output(context->std_error, socket_fd);
382
383         /* This expects the input and output are already set up */
384
385         /* Don't change the stderr file descriptor if we inherit all
386          * the way and are not on a tty */
387         if (e == EXEC_OUTPUT_INHERIT &&
388             o == EXEC_OUTPUT_INHERIT &&
389             i == EXEC_INPUT_NULL &&
390             !is_terminal_input(context->std_input) &&
391             getppid () != 1)
392                 return STDERR_FILENO;
393
394         /* Duplicate from stdout if possible */
395         if (e == o || e == EXEC_OUTPUT_INHERIT)
396                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
397
398         switch (e) {
399
400         case EXEC_OUTPUT_NULL:
401                 return open_null_as(O_WRONLY, STDERR_FILENO);
402
403         case EXEC_OUTPUT_TTY:
404                 if (is_terminal_input(i))
405                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
406
407                 /* We don't reset the terminal if this is just about output */
408                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
409
410         case EXEC_OUTPUT_SYSLOG:
411         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
412         case EXEC_OUTPUT_KMSG:
413         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
414         case EXEC_OUTPUT_JOURNAL:
415         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
416                 return connect_logger_as(context, e, ident, STDERR_FILENO);
417
418         case EXEC_OUTPUT_SOCKET:
419                 assert(socket_fd >= 0);
420                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
421
422         default:
423                 assert_not_reached("Unknown error type");
424         }
425 }
426
427 static int chown_terminal(int fd, uid_t uid) {
428         struct stat st;
429
430         assert(fd >= 0);
431
432         /* This might fail. What matters are the results. */
433         (void) fchown(fd, uid, -1);
434         (void) fchmod(fd, TTY_MODE);
435
436         if (fstat(fd, &st) < 0)
437                 return -errno;
438
439         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
440                 return -EPERM;
441
442         return 0;
443 }
444
445 static int setup_confirm_stdio(const ExecContext *context,
446                                int *_saved_stdin,
447                                int *_saved_stdout) {
448         int fd = -1, saved_stdin, saved_stdout = -1, r;
449
450         assert(context);
451         assert(_saved_stdin);
452         assert(_saved_stdout);
453
454         /* This returns positive EXIT_xxx return values instead of
455          * negative errno style values! */
456
457         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
458                 return EXIT_STDIN;
459
460         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
461                 r = EXIT_STDOUT;
462                 goto fail;
463         }
464
465         if ((fd = acquire_terminal(
466                              tty_path(context),
467                              context->std_input == EXEC_INPUT_TTY_FAIL,
468                              context->std_input == EXEC_INPUT_TTY_FORCE,
469                              false)) < 0) {
470                 r = EXIT_STDIN;
471                 goto fail;
472         }
473
474         if (chown_terminal(fd, getuid()) < 0) {
475                 r = EXIT_STDIN;
476                 goto fail;
477         }
478
479         if (dup2(fd, STDIN_FILENO) < 0) {
480                 r = EXIT_STDIN;
481                 goto fail;
482         }
483
484         if (dup2(fd, STDOUT_FILENO) < 0) {
485                 r = EXIT_STDOUT;
486                 goto fail;
487         }
488
489         if (fd >= 2)
490                 close_nointr_nofail(fd);
491
492         *_saved_stdin = saved_stdin;
493         *_saved_stdout = saved_stdout;
494
495         return 0;
496
497 fail:
498         if (saved_stdout >= 0)
499                 close_nointr_nofail(saved_stdout);
500
501         if (saved_stdin >= 0)
502                 close_nointr_nofail(saved_stdin);
503
504         if (fd >= 0)
505                 close_nointr_nofail(fd);
506
507         return r;
508 }
509
510 static int restore_confirm_stdio(const ExecContext *context,
511                                  int *saved_stdin,
512                                  int *saved_stdout,
513                                  bool *keep_stdin,
514                                  bool *keep_stdout) {
515
516         assert(context);
517         assert(saved_stdin);
518         assert(*saved_stdin >= 0);
519         assert(saved_stdout);
520         assert(*saved_stdout >= 0);
521
522         /* This returns positive EXIT_xxx return values instead of
523          * negative errno style values! */
524
525         if (is_terminal_input(context->std_input)) {
526
527                 /* The service wants terminal input. */
528
529                 *keep_stdin = true;
530                 *keep_stdout =
531                         context->std_output == EXEC_OUTPUT_INHERIT ||
532                         context->std_output == EXEC_OUTPUT_TTY;
533
534         } else {
535                 /* If the service doesn't want a controlling terminal,
536                  * then we need to get rid entirely of what we have
537                  * already. */
538
539                 if (release_terminal() < 0)
540                         return EXIT_STDIN;
541
542                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
543                         return EXIT_STDIN;
544
545                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
546                         return EXIT_STDOUT;
547
548                 *keep_stdout = *keep_stdin = false;
549         }
550
551         return 0;
552 }
553
554 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
555         bool keep_groups = false;
556         int r;
557
558         assert(context);
559
560         /* Lookup and set GID and supplementary group list. Here too
561          * we avoid NSS lookups for gid=0. */
562
563         if (context->group || username) {
564
565                 if (context->group) {
566                         const char *g = context->group;
567
568                         if ((r = get_group_creds(&g, &gid)) < 0)
569                                 return r;
570                 }
571
572                 /* First step, initialize groups from /etc/groups */
573                 if (username && gid != 0) {
574                         if (initgroups(username, gid) < 0)
575                                 return -errno;
576
577                         keep_groups = true;
578                 }
579
580                 /* Second step, set our gids */
581                 if (setresgid(gid, gid, gid) < 0)
582                         return -errno;
583         }
584
585         if (context->supplementary_groups) {
586                 int ngroups_max, k;
587                 gid_t *gids;
588                 char **i;
589
590                 /* Final step, initialize any manually set supplementary groups */
591                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
592
593                 if (!(gids = new(gid_t, ngroups_max)))
594                         return -ENOMEM;
595
596                 if (keep_groups) {
597                         if ((k = getgroups(ngroups_max, gids)) < 0) {
598                                 free(gids);
599                                 return -errno;
600                         }
601                 } else
602                         k = 0;
603
604                 STRV_FOREACH(i, context->supplementary_groups) {
605                         const char *g;
606
607                         if (k >= ngroups_max) {
608                                 free(gids);
609                                 return -E2BIG;
610                         }
611
612                         g = *i;
613                         r = get_group_creds(&g, gids+k);
614                         if (r < 0) {
615                                 free(gids);
616                                 return r;
617                         }
618
619                         k++;
620                 }
621
622                 if (setgroups(k, gids) < 0) {
623                         free(gids);
624                         return -errno;
625                 }
626
627                 free(gids);
628         }
629
630         return 0;
631 }
632
633 static int enforce_user(const ExecContext *context, uid_t uid) {
634         int r;
635         assert(context);
636
637         /* Sets (but doesn't lookup) the uid and make sure we keep the
638          * capabilities while doing so. */
639
640         if (context->capabilities) {
641                 cap_t d;
642                 static const cap_value_t bits[] = {
643                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
644                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
645                 };
646
647                 /* First step: If we need to keep capabilities but
648                  * drop privileges we need to make sure we keep our
649                  * caps, whiel we drop privileges. */
650                 if (uid != 0) {
651                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
652
653                         if (prctl(PR_GET_SECUREBITS) != sb)
654                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
655                                         return -errno;
656                 }
657
658                 /* Second step: set the capabilities. This will reduce
659                  * the capabilities to the minimum we need. */
660
661                 if (!(d = cap_dup(context->capabilities)))
662                         return -errno;
663
664                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
665                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
666                         r = -errno;
667                         cap_free(d);
668                         return r;
669                 }
670
671                 if (cap_set_proc(d) < 0) {
672                         r = -errno;
673                         cap_free(d);
674                         return r;
675                 }
676
677                 cap_free(d);
678         }
679
680         /* Third step: actually set the uids */
681         if (setresuid(uid, uid, uid) < 0)
682                 return -errno;
683
684         /* At this point we should have all necessary capabilities but
685            are otherwise a normal user. However, the caps might got
686            corrupted due to the setresuid() so we need clean them up
687            later. This is done outside of this call. */
688
689         return 0;
690 }
691
692 #ifdef HAVE_PAM
693
694 static int null_conv(
695                 int num_msg,
696                 const struct pam_message **msg,
697                 struct pam_response **resp,
698                 void *appdata_ptr) {
699
700         /* We don't support conversations */
701
702         return PAM_CONV_ERR;
703 }
704
705 static int setup_pam(
706                 const char *name,
707                 const char *user,
708                 uid_t uid,
709                 const char *tty,
710                 char ***pam_env,
711                 int fds[], unsigned n_fds) {
712
713         static const struct pam_conv conv = {
714                 .conv = null_conv,
715                 .appdata_ptr = NULL
716         };
717
718         pam_handle_t *handle = NULL;
719         sigset_t ss, old_ss;
720         int pam_code = PAM_SUCCESS;
721         int err;
722         char **e = NULL;
723         bool close_session = false;
724         pid_t pam_pid = 0, parent_pid;
725
726         assert(name);
727         assert(user);
728         assert(pam_env);
729
730         /* We set up PAM in the parent process, then fork. The child
731          * will then stay around until killed via PR_GET_PDEATHSIG or
732          * systemd via the cgroup logic. It will then remove the PAM
733          * session again. The parent process will exec() the actual
734          * daemon. We do things this way to ensure that the main PID
735          * of the daemon is the one we initially fork()ed. */
736
737         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
738                 handle = NULL;
739                 goto fail;
740         }
741
742         if (tty)
743                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
744                         goto fail;
745
746         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
747                 goto fail;
748
749         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
750                 goto fail;
751
752         close_session = true;
753
754         if ((!(e = pam_getenvlist(handle)))) {
755                 pam_code = PAM_BUF_ERR;
756                 goto fail;
757         }
758
759         /* Block SIGTERM, so that we know that it won't get lost in
760          * the child */
761         if (sigemptyset(&ss) < 0 ||
762             sigaddset(&ss, SIGTERM) < 0 ||
763             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
764                 goto fail;
765
766         parent_pid = getpid();
767
768         if ((pam_pid = fork()) < 0)
769                 goto fail;
770
771         if (pam_pid == 0) {
772                 int sig;
773                 int r = EXIT_PAM;
774
775                 /* The child's job is to reset the PAM session on
776                  * termination */
777
778                 /* This string must fit in 10 chars (i.e. the length
779                  * of "/sbin/init"), to look pretty in /bin/ps */
780                 rename_process("(sd-pam)");
781
782                 /* Make sure we don't keep open the passed fds in this
783                 child. We assume that otherwise only those fds are
784                 open here that have been opened by PAM. */
785                 close_many(fds, n_fds);
786
787                 /* Drop privileges - we don't need any to pam_close_session
788                  * and this will make PR_SET_PDEATHSIG work in most cases.
789                  * If this fails, ignore the error - but expect sd-pam threads
790                  * to fail to exit normally */
791                 if (setresuid(uid, uid, uid) < 0)
792                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
793
794                 /* Wait until our parent died. This will only work if
795                  * the above setresuid() succeeds, otherwise the kernel
796                  * will not allow unprivileged parents kill their privileged
797                  * children this way. We rely on the control groups kill logic
798                  * to do the rest for us. */
799                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
800                         goto child_finish;
801
802                 /* Check if our parent process might already have
803                  * died? */
804                 if (getppid() == parent_pid) {
805                         for (;;) {
806                                 if (sigwait(&ss, &sig) < 0) {
807                                         if (errno == EINTR)
808                                                 continue;
809
810                                         goto child_finish;
811                                 }
812
813                                 assert(sig == SIGTERM);
814                                 break;
815                         }
816                 }
817
818                 /* If our parent died we'll end the session */
819                 if (getppid() != parent_pid)
820                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
821                                 goto child_finish;
822
823                 r = 0;
824
825         child_finish:
826                 pam_end(handle, pam_code | PAM_DATA_SILENT);
827                 _exit(r);
828         }
829
830         /* If the child was forked off successfully it will do all the
831          * cleanups, so forget about the handle here. */
832         handle = NULL;
833
834         /* Unblock SIGTERM again in the parent */
835         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
836                 goto fail;
837
838         /* We close the log explicitly here, since the PAM modules
839          * might have opened it, but we don't want this fd around. */
840         closelog();
841
842         *pam_env = e;
843         e = NULL;
844
845         return 0;
846
847 fail:
848         if (pam_code != PAM_SUCCESS)
849                 err = -EPERM;  /* PAM errors do not map to errno */
850         else
851                 err = -errno;
852
853         if (handle) {
854                 if (close_session)
855                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
856
857                 pam_end(handle, pam_code | PAM_DATA_SILENT);
858         }
859
860         strv_free(e);
861
862         closelog();
863
864         if (pam_pid > 1) {
865                 kill(pam_pid, SIGTERM);
866                 kill(pam_pid, SIGCONT);
867         }
868
869         return err;
870 }
871 #endif
872
873 static int do_capability_bounding_set_drop(uint64_t drop) {
874         unsigned long i;
875         cap_t old_cap = NULL, new_cap = NULL;
876         cap_flag_value_t fv;
877         int r;
878
879         /* If we are run as PID 1 we will lack CAP_SETPCAP by default
880          * in the effective set (yes, the kernel drops that when
881          * executing init!), so get it back temporarily so that we can
882          * call PR_CAPBSET_DROP. */
883
884         old_cap = cap_get_proc();
885         if (!old_cap)
886                 return -errno;
887
888         if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
889                 r = -errno;
890                 goto finish;
891         }
892
893         if (fv != CAP_SET) {
894                 static const cap_value_t v = CAP_SETPCAP;
895
896                 new_cap = cap_dup(old_cap);
897                 if (!new_cap) {
898                         r = -errno;
899                         goto finish;
900                 }
901
902                 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
903                         r = -errno;
904                         goto finish;
905                 }
906
907                 if (cap_set_proc(new_cap) < 0) {
908                         r = -errno;
909                         goto finish;
910                 }
911         }
912
913         for (i = 0; i <= cap_last_cap(); i++)
914                 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
915                         if (prctl(PR_CAPBSET_DROP, i) < 0) {
916                                 r = -errno;
917                                 goto finish;
918                         }
919                 }
920
921         r = 0;
922
923 finish:
924         if (new_cap)
925                 cap_free(new_cap);
926
927         if (old_cap) {
928                 cap_set_proc(old_cap);
929                 cap_free(old_cap);
930         }
931
932         return r;
933 }
934
935 static void rename_process_from_path(const char *path) {
936         char process_name[11];
937         const char *p;
938         size_t l;
939
940         /* This resulting string must fit in 10 chars (i.e. the length
941          * of "/sbin/init") to look pretty in /bin/ps */
942
943         p = path_get_file_name(path);
944         if (isempty(p)) {
945                 rename_process("(...)");
946                 return;
947         }
948
949         l = strlen(p);
950         if (l > 8) {
951                 /* The end of the process name is usually more
952                  * interesting, since the first bit might just be
953                  * "systemd-" */
954                 p = p + l - 8;
955                 l = 8;
956         }
957
958         process_name[0] = '(';
959         memcpy(process_name+1, p, l);
960         process_name[1+l] = ')';
961         process_name[1+l+1] = 0;
962
963         rename_process(process_name);
964 }
965
966 int exec_spawn(ExecCommand *command,
967                char **argv,
968                const ExecContext *context,
969                int fds[], unsigned n_fds,
970                char **environment,
971                bool apply_permissions,
972                bool apply_chroot,
973                bool apply_tty_stdin,
974                bool confirm_spawn,
975                CGroupBonding *cgroup_bondings,
976                CGroupAttribute *cgroup_attributes,
977                const char *cgroup_suffix,
978                int idle_pipe[2],
979                pid_t *ret) {
980
981         pid_t pid;
982         int r;
983         char *line;
984         int socket_fd;
985         char **files_env = NULL;
986
987         assert(command);
988         assert(context);
989         assert(ret);
990         assert(fds || n_fds <= 0);
991
992         if (context->std_input == EXEC_INPUT_SOCKET ||
993             context->std_output == EXEC_OUTPUT_SOCKET ||
994             context->std_error == EXEC_OUTPUT_SOCKET) {
995
996                 if (n_fds != 1)
997                         return -EINVAL;
998
999                 socket_fd = fds[0];
1000
1001                 fds = NULL;
1002                 n_fds = 0;
1003         } else
1004                 socket_fd = -1;
1005
1006         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
1007                 log_error("Failed to load environment files: %s", strerror(-r));
1008                 return r;
1009         }
1010
1011         if (!argv)
1012                 argv = command->argv;
1013
1014         if (!(line = exec_command_line(argv))) {
1015                 r = -ENOMEM;
1016                 goto fail_parent;
1017         }
1018
1019         log_debug("About to execute: %s", line);
1020         free(line);
1021
1022         r = cgroup_bonding_realize_list(cgroup_bondings);
1023         if (r < 0)
1024                 goto fail_parent;
1025
1026         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1027
1028         if ((pid = fork()) < 0) {
1029                 r = -errno;
1030                 goto fail_parent;
1031         }
1032
1033         if (pid == 0) {
1034                 int i, err;
1035                 sigset_t ss;
1036                 const char *username = NULL, *home = NULL;
1037                 uid_t uid = (uid_t) -1;
1038                 gid_t gid = (gid_t) -1;
1039                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1040                 unsigned n_env = 0;
1041                 int saved_stdout = -1, saved_stdin = -1;
1042                 bool keep_stdout = false, keep_stdin = false, set_access = false;
1043
1044                 /* child */
1045
1046                 rename_process_from_path(command->path);
1047
1048                 /* We reset exactly these signals, since they are the
1049                  * only ones we set to SIG_IGN in the main daemon. All
1050                  * others we leave untouched because we set them to
1051                  * SIG_DFL or a valid handler initially, both of which
1052                  * will be demoted to SIG_DFL. */
1053                 default_signals(SIGNALS_CRASH_HANDLER,
1054                                 SIGNALS_IGNORE, -1);
1055
1056                 if (context->ignore_sigpipe)
1057                         ignore_signals(SIGPIPE, -1);
1058
1059                 assert_se(sigemptyset(&ss) == 0);
1060                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1061                         err = -errno;
1062                         r = EXIT_SIGNAL_MASK;
1063                         goto fail_child;
1064                 }
1065
1066                 if (idle_pipe) {
1067                         if (idle_pipe[1] >= 0)
1068                                 close_nointr_nofail(idle_pipe[1]);
1069                         if (idle_pipe[0] >= 0) {
1070                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1071                                 close_nointr_nofail(idle_pipe[0]);
1072                         }
1073                 }
1074
1075                 /* Close sockets very early to make sure we don't
1076                  * block init reexecution because it cannot bind its
1077                  * sockets */
1078                 log_forget_fds();
1079                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1080                                            socket_fd >= 0 ? 1 : n_fds);
1081                 if (err < 0) {
1082                         r = EXIT_FDS;
1083                         goto fail_child;
1084                 }
1085
1086                 if (!context->same_pgrp)
1087                         if (setsid() < 0) {
1088                                 err = -errno;
1089                                 r = EXIT_SETSID;
1090                                 goto fail_child;
1091                         }
1092
1093                 if (context->tcpwrap_name) {
1094                         if (socket_fd >= 0)
1095                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1096                                         err = -EACCES;
1097                                         r = EXIT_TCPWRAP;
1098                                         goto fail_child;
1099                                 }
1100
1101                         for (i = 0; i < (int) n_fds; i++) {
1102                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1103                                         err = -EACCES;
1104                                         r = EXIT_TCPWRAP;
1105                                         goto fail_child;
1106                                 }
1107                         }
1108                 }
1109
1110                 exec_context_tty_reset(context);
1111
1112                 /* We skip the confirmation step if we shall not apply the TTY */
1113                 if (confirm_spawn &&
1114                     (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1115                         char response;
1116
1117                         /* Set up terminal for the question */
1118                         if ((r = setup_confirm_stdio(context,
1119                                                      &saved_stdin, &saved_stdout))) {
1120                                 err = -errno;
1121                                 goto fail_child;
1122                         }
1123
1124                         /* Now ask the question. */
1125                         if (!(line = exec_command_line(argv))) {
1126                                 err = -ENOMEM;
1127                                 r = EXIT_MEMORY;
1128                                 goto fail_child;
1129                         }
1130
1131                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1132                         free(line);
1133
1134                         if (r < 0 || response == 'n') {
1135                                 err = -ECANCELED;
1136                                 r = EXIT_CONFIRM;
1137                                 goto fail_child;
1138                         } else if (response == 's') {
1139                                 err = r = 0;
1140                                 goto fail_child;
1141                         }
1142
1143                         /* Release terminal for the question */
1144                         if ((r = restore_confirm_stdio(context,
1145                                                        &saved_stdin, &saved_stdout,
1146                                                        &keep_stdin, &keep_stdout))) {
1147                                 err = -errno;
1148                                 goto fail_child;
1149                         }
1150                 }
1151
1152                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1153                  * must sure to drop O_NONBLOCK */
1154                 if (socket_fd >= 0)
1155                         fd_nonblock(socket_fd, false);
1156
1157                 if (!keep_stdin) {
1158                         err = setup_input(context, socket_fd, apply_tty_stdin);
1159                         if (err < 0) {
1160                                 r = EXIT_STDIN;
1161                                 goto fail_child;
1162                         }
1163                 }
1164
1165                 if (!keep_stdout) {
1166                         err = setup_output(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1167                         if (err < 0) {
1168                                 r = EXIT_STDOUT;
1169                                 goto fail_child;
1170                         }
1171                 }
1172
1173                 err = setup_error(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1174                 if (err < 0) {
1175                         r = EXIT_STDERR;
1176                         goto fail_child;
1177                 }
1178
1179                 if (cgroup_bondings) {
1180                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1181                         if (err < 0) {
1182                                 r = EXIT_CGROUP;
1183                                 goto fail_child;
1184                         }
1185                 }
1186
1187                 if (context->oom_score_adjust_set) {
1188                         char t[16];
1189
1190                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1191                         char_array_0(t);
1192
1193                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1194                                 /* Compatibility with Linux <= 2.6.35 */
1195
1196                                 int adj;
1197
1198                                 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1199                                 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1200
1201                                 snprintf(t, sizeof(t), "%i", adj);
1202                                 char_array_0(t);
1203
1204                                 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1205                                     && errno != EACCES) {
1206                                         err = -errno;
1207                                         r = EXIT_OOM_ADJUST;
1208                                         goto fail_child;
1209                                 }
1210                         }
1211                 }
1212
1213                 if (context->nice_set)
1214                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1215                                 err = -errno;
1216                                 r = EXIT_NICE;
1217                                 goto fail_child;
1218                         }
1219
1220                 if (context->cpu_sched_set) {
1221                         struct sched_param param;
1222
1223                         zero(param);
1224                         param.sched_priority = context->cpu_sched_priority;
1225
1226                         if (sched_setscheduler(0, context->cpu_sched_policy |
1227                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1228                                 err = -errno;
1229                                 r = EXIT_SETSCHEDULER;
1230                                 goto fail_child;
1231                         }
1232                 }
1233
1234                 if (context->cpuset)
1235                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1236                                 err = -errno;
1237                                 r = EXIT_CPUAFFINITY;
1238                                 goto fail_child;
1239                         }
1240
1241                 if (context->ioprio_set)
1242                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1243                                 err = -errno;
1244                                 r = EXIT_IOPRIO;
1245                                 goto fail_child;
1246                         }
1247
1248                 if (context->timer_slack_nsec_set)
1249                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1250                                 err = -errno;
1251                                 r = EXIT_TIMERSLACK;
1252                                 goto fail_child;
1253                         }
1254
1255                 if (context->utmp_id)
1256                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1257
1258                 if (context->user) {
1259                         username = context->user;
1260                         err = get_user_creds(&username, &uid, &gid, &home);
1261                         if (err < 0) {
1262                                 r = EXIT_USER;
1263                                 goto fail_child;
1264                         }
1265
1266                         if (is_terminal_input(context->std_input)) {
1267                                 err = chown_terminal(STDIN_FILENO, uid);
1268                                 if (err < 0) {
1269                                         r = EXIT_STDIN;
1270                                         goto fail_child;
1271                                 }
1272                         }
1273
1274                         if (cgroup_bondings && context->control_group_modify) {
1275                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1276                                 if (err >= 0)
1277                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1278                                 if (err < 0) {
1279                                         r = EXIT_CGROUP;
1280                                         goto fail_child;
1281                                 }
1282
1283                                 set_access = true;
1284                         }
1285                 }
1286
1287                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1288                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1289                         if (err < 0) {
1290                                 r = EXIT_CGROUP;
1291                                 goto fail_child;
1292                         }
1293                 }
1294
1295                 if (apply_permissions) {
1296                         err = enforce_groups(context, username, gid);
1297                         if (err < 0) {
1298                                 r = EXIT_GROUP;
1299                                 goto fail_child;
1300                         }
1301                 }
1302
1303                 umask(context->umask);
1304
1305 #ifdef HAVE_PAM
1306                 if (context->pam_name && username) {
1307                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1308                         if (err < 0) {
1309                                 r = EXIT_PAM;
1310                                 goto fail_child;
1311                         }
1312                 }
1313 #endif
1314                 if (context->private_network) {
1315                         if (unshare(CLONE_NEWNET) < 0) {
1316                                 err = -errno;
1317                                 r = EXIT_NETWORK;
1318                                 goto fail_child;
1319                         }
1320
1321                         loopback_setup();
1322                 }
1323
1324                 if (strv_length(context->read_write_dirs) > 0 ||
1325                     strv_length(context->read_only_dirs) > 0 ||
1326                     strv_length(context->inaccessible_dirs) > 0 ||
1327                     context->mount_flags != MS_SHARED ||
1328                     context->private_tmp) {
1329                         err = setup_namespace(context->read_write_dirs,
1330                                               context->read_only_dirs,
1331                                               context->inaccessible_dirs,
1332                                               context->private_tmp,
1333                                               context->mount_flags);
1334                         if (err < 0) {
1335                                 r = EXIT_NAMESPACE;
1336                                 goto fail_child;
1337                         }
1338                 }
1339
1340                 if (apply_chroot) {
1341                         if (context->root_directory)
1342                                 if (chroot(context->root_directory) < 0) {
1343                                         err = -errno;
1344                                         r = EXIT_CHROOT;
1345                                         goto fail_child;
1346                                 }
1347
1348                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1349                                 err = -errno;
1350                                 r = EXIT_CHDIR;
1351                                 goto fail_child;
1352                         }
1353                 } else {
1354
1355                         char *d;
1356
1357                         if (asprintf(&d, "%s/%s",
1358                                      context->root_directory ? context->root_directory : "",
1359                                      context->working_directory ? context->working_directory : "") < 0) {
1360                                 err = -ENOMEM;
1361                                 r = EXIT_MEMORY;
1362                                 goto fail_child;
1363                         }
1364
1365                         if (chdir(d) < 0) {
1366                                 err = -errno;
1367                                 free(d);
1368                                 r = EXIT_CHDIR;
1369                                 goto fail_child;
1370                         }
1371
1372                         free(d);
1373                 }
1374
1375                 /* We repeat the fd closing here, to make sure that
1376                  * nothing is leaked from the PAM modules */
1377                 err = close_all_fds(fds, n_fds);
1378                 if (err >= 0)
1379                         err = shift_fds(fds, n_fds);
1380                 if (err >= 0)
1381                         err = flags_fds(fds, n_fds, context->non_blocking);
1382                 if (err < 0) {
1383                         r = EXIT_FDS;
1384                         goto fail_child;
1385                 }
1386
1387                 if (apply_permissions) {
1388
1389                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1390                                 if (!context->rlimit[i])
1391                                         continue;
1392
1393                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1394                                         err = -errno;
1395                                         r = EXIT_LIMITS;
1396                                         goto fail_child;
1397                                 }
1398                         }
1399
1400                         if (context->capability_bounding_set_drop) {
1401                                 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1402                                 if (err < 0) {
1403                                         r = EXIT_CAPABILITIES;
1404                                         goto fail_child;
1405                                 }
1406                         }
1407
1408                         if (context->user) {
1409                                 err = enforce_user(context, uid);
1410                                 if (err < 0) {
1411                                         r = EXIT_USER;
1412                                         goto fail_child;
1413                                 }
1414                         }
1415
1416                         /* PR_GET_SECUREBITS is not privileged, while
1417                          * PR_SET_SECUREBITS is. So to suppress
1418                          * potential EPERMs we'll try not to call
1419                          * PR_SET_SECUREBITS unless necessary. */
1420                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1421                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1422                                         err = -errno;
1423                                         r = EXIT_SECUREBITS;
1424                                         goto fail_child;
1425                                 }
1426
1427                         if (context->capabilities)
1428                                 if (cap_set_proc(context->capabilities) < 0) {
1429                                         err = -errno;
1430                                         r = EXIT_CAPABILITIES;
1431                                         goto fail_child;
1432                                 }
1433                 }
1434
1435                 if (!(our_env = new0(char*, 7))) {
1436                         err = -ENOMEM;
1437                         r = EXIT_MEMORY;
1438                         goto fail_child;
1439                 }
1440
1441                 if (n_fds > 0)
1442                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1443                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1444                                 err = -ENOMEM;
1445                                 r = EXIT_MEMORY;
1446                                 goto fail_child;
1447                         }
1448
1449                 if (home)
1450                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1451                                 err = -ENOMEM;
1452                                 r = EXIT_MEMORY;
1453                                 goto fail_child;
1454                         }
1455
1456                 if (username)
1457                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1458                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1459                                 err = -ENOMEM;
1460                                 r = EXIT_MEMORY;
1461                                 goto fail_child;
1462                         }
1463
1464                 if (is_terminal_input(context->std_input) ||
1465                     context->std_output == EXEC_OUTPUT_TTY ||
1466                     context->std_error == EXEC_OUTPUT_TTY)
1467                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1468                                 err = -ENOMEM;
1469                                 r = EXIT_MEMORY;
1470                                 goto fail_child;
1471                         }
1472
1473                 assert(n_env <= 7);
1474
1475                 if (!(final_env = strv_env_merge(
1476                                       5,
1477                                       environment,
1478                                       our_env,
1479                                       context->environment,
1480                                       files_env,
1481                                       pam_env,
1482                                       NULL))) {
1483                         err = -ENOMEM;
1484                         r = EXIT_MEMORY;
1485                         goto fail_child;
1486                 }
1487
1488                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1489                         err = -ENOMEM;
1490                         r = EXIT_MEMORY;
1491                         goto fail_child;
1492                 }
1493
1494                 final_env = strv_env_clean(final_env);
1495
1496                 execve(command->path, final_argv, final_env);
1497                 err = -errno;
1498                 r = EXIT_EXEC;
1499
1500         fail_child:
1501                 if (r != 0) {
1502                         log_open();
1503                         log_warning("Failed at step %s spawning %s: %s",
1504                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1505                                     command->path, strerror(-err));
1506                 }
1507
1508                 strv_free(our_env);
1509                 strv_free(final_env);
1510                 strv_free(pam_env);
1511                 strv_free(files_env);
1512                 strv_free(final_argv);
1513
1514                 if (saved_stdin >= 0)
1515                         close_nointr_nofail(saved_stdin);
1516
1517                 if (saved_stdout >= 0)
1518                         close_nointr_nofail(saved_stdout);
1519
1520                 _exit(r);
1521         }
1522
1523         strv_free(files_env);
1524
1525         /* We add the new process to the cgroup both in the child (so
1526          * that we can be sure that no user code is ever executed
1527          * outside of the cgroup) and in the parent (so that we can be
1528          * sure that when we kill the cgroup the process will be
1529          * killed too). */
1530         if (cgroup_bondings)
1531                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1532
1533         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1534
1535         exec_status_start(&command->exec_status, pid);
1536
1537         *ret = pid;
1538         return 0;
1539
1540 fail_parent:
1541         strv_free(files_env);
1542
1543         return r;
1544 }
1545
1546 void exec_context_init(ExecContext *c) {
1547         assert(c);
1548
1549         c->umask = 0022;
1550         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1551         c->cpu_sched_policy = SCHED_OTHER;
1552         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1553         c->syslog_level_prefix = true;
1554         c->mount_flags = MS_SHARED;
1555         c->kill_signal = SIGTERM;
1556         c->send_sigkill = true;
1557         c->control_group_persistent = -1;
1558         c->ignore_sigpipe = true;
1559 }
1560
1561 void exec_context_done(ExecContext *c) {
1562         unsigned l;
1563
1564         assert(c);
1565
1566         strv_free(c->environment);
1567         c->environment = NULL;
1568
1569         strv_free(c->environment_files);
1570         c->environment_files = NULL;
1571
1572         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1573                 free(c->rlimit[l]);
1574                 c->rlimit[l] = NULL;
1575         }
1576
1577         free(c->working_directory);
1578         c->working_directory = NULL;
1579         free(c->root_directory);
1580         c->root_directory = NULL;
1581
1582         free(c->tty_path);
1583         c->tty_path = NULL;
1584
1585         free(c->tcpwrap_name);
1586         c->tcpwrap_name = NULL;
1587
1588         free(c->syslog_identifier);
1589         c->syslog_identifier = NULL;
1590
1591         free(c->user);
1592         c->user = NULL;
1593
1594         free(c->group);
1595         c->group = NULL;
1596
1597         strv_free(c->supplementary_groups);
1598         c->supplementary_groups = NULL;
1599
1600         free(c->pam_name);
1601         c->pam_name = NULL;
1602
1603         if (c->capabilities) {
1604                 cap_free(c->capabilities);
1605                 c->capabilities = NULL;
1606         }
1607
1608         strv_free(c->read_only_dirs);
1609         c->read_only_dirs = NULL;
1610
1611         strv_free(c->read_write_dirs);
1612         c->read_write_dirs = NULL;
1613
1614         strv_free(c->inaccessible_dirs);
1615         c->inaccessible_dirs = NULL;
1616
1617         if (c->cpuset)
1618                 CPU_FREE(c->cpuset);
1619
1620         free(c->utmp_id);
1621         c->utmp_id = NULL;
1622 }
1623
1624 void exec_command_done(ExecCommand *c) {
1625         assert(c);
1626
1627         free(c->path);
1628         c->path = NULL;
1629
1630         strv_free(c->argv);
1631         c->argv = NULL;
1632 }
1633
1634 void exec_command_done_array(ExecCommand *c, unsigned n) {
1635         unsigned i;
1636
1637         for (i = 0; i < n; i++)
1638                 exec_command_done(c+i);
1639 }
1640
1641 void exec_command_free_list(ExecCommand *c) {
1642         ExecCommand *i;
1643
1644         while ((i = c)) {
1645                 LIST_REMOVE(ExecCommand, command, c, i);
1646                 exec_command_done(i);
1647                 free(i);
1648         }
1649 }
1650
1651 void exec_command_free_array(ExecCommand **c, unsigned n) {
1652         unsigned i;
1653
1654         for (i = 0; i < n; i++) {
1655                 exec_command_free_list(c[i]);
1656                 c[i] = NULL;
1657         }
1658 }
1659
1660 int exec_context_load_environment(const ExecContext *c, char ***l) {
1661         char **i, **r = NULL;
1662
1663         assert(c);
1664         assert(l);
1665
1666         STRV_FOREACH(i, c->environment_files) {
1667                 char *fn;
1668                 int k;
1669                 bool ignore = false;
1670                 char **p;
1671
1672                 fn = *i;
1673
1674                 if (fn[0] == '-') {
1675                         ignore = true;
1676                         fn ++;
1677                 }
1678
1679                 if (!path_is_absolute(fn)) {
1680
1681                         if (ignore)
1682                                 continue;
1683
1684                         strv_free(r);
1685                         return -EINVAL;
1686                 }
1687
1688                 if ((k = load_env_file(fn, &p)) < 0) {
1689
1690                         if (ignore)
1691                                 continue;
1692
1693                         strv_free(r);
1694                         return k;
1695                 }
1696
1697                 if (r == NULL)
1698                         r = p;
1699                 else {
1700                         char **m;
1701
1702                         m = strv_env_merge(2, r, p);
1703                         strv_free(r);
1704                         strv_free(p);
1705
1706                         if (!m)
1707                                 return -ENOMEM;
1708
1709                         r = m;
1710                 }
1711         }
1712
1713         *l = r;
1714
1715         return 0;
1716 }
1717
1718 static void strv_fprintf(FILE *f, char **l) {
1719         char **g;
1720
1721         assert(f);
1722
1723         STRV_FOREACH(g, l)
1724                 fprintf(f, " %s", *g);
1725 }
1726
1727 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1728         char ** e;
1729         unsigned i;
1730
1731         assert(c);
1732         assert(f);
1733
1734         if (!prefix)
1735                 prefix = "";
1736
1737         fprintf(f,
1738                 "%sUMask: %04o\n"
1739                 "%sWorkingDirectory: %s\n"
1740                 "%sRootDirectory: %s\n"
1741                 "%sNonBlocking: %s\n"
1742                 "%sPrivateTmp: %s\n"
1743                 "%sControlGroupModify: %s\n"
1744                 "%sControlGroupPersistent: %s\n"
1745                 "%sPrivateNetwork: %s\n",
1746                 prefix, c->umask,
1747                 prefix, c->working_directory ? c->working_directory : "/",
1748                 prefix, c->root_directory ? c->root_directory : "/",
1749                 prefix, yes_no(c->non_blocking),
1750                 prefix, yes_no(c->private_tmp),
1751                 prefix, yes_no(c->control_group_modify),
1752                 prefix, yes_no(c->control_group_persistent),
1753                 prefix, yes_no(c->private_network));
1754
1755         STRV_FOREACH(e, c->environment)
1756                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1757
1758         STRV_FOREACH(e, c->environment_files)
1759                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1760
1761         if (c->tcpwrap_name)
1762                 fprintf(f,
1763                         "%sTCPWrapName: %s\n",
1764                         prefix, c->tcpwrap_name);
1765
1766         if (c->nice_set)
1767                 fprintf(f,
1768                         "%sNice: %i\n",
1769                         prefix, c->nice);
1770
1771         if (c->oom_score_adjust_set)
1772                 fprintf(f,
1773                         "%sOOMScoreAdjust: %i\n",
1774                         prefix, c->oom_score_adjust);
1775
1776         for (i = 0; i < RLIM_NLIMITS; i++)
1777                 if (c->rlimit[i])
1778                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1779
1780         if (c->ioprio_set)
1781                 fprintf(f,
1782                         "%sIOSchedulingClass: %s\n"
1783                         "%sIOPriority: %i\n",
1784                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1785                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1786
1787         if (c->cpu_sched_set)
1788                 fprintf(f,
1789                         "%sCPUSchedulingPolicy: %s\n"
1790                         "%sCPUSchedulingPriority: %i\n"
1791                         "%sCPUSchedulingResetOnFork: %s\n",
1792                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1793                         prefix, c->cpu_sched_priority,
1794                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1795
1796         if (c->cpuset) {
1797                 fprintf(f, "%sCPUAffinity:", prefix);
1798                 for (i = 0; i < c->cpuset_ncpus; i++)
1799                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1800                                 fprintf(f, " %i", i);
1801                 fputs("\n", f);
1802         }
1803
1804         if (c->timer_slack_nsec_set)
1805                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1806
1807         fprintf(f,
1808                 "%sStandardInput: %s\n"
1809                 "%sStandardOutput: %s\n"
1810                 "%sStandardError: %s\n",
1811                 prefix, exec_input_to_string(c->std_input),
1812                 prefix, exec_output_to_string(c->std_output),
1813                 prefix, exec_output_to_string(c->std_error));
1814
1815         if (c->tty_path)
1816                 fprintf(f,
1817                         "%sTTYPath: %s\n"
1818                         "%sTTYReset: %s\n"
1819                         "%sTTYVHangup: %s\n"
1820                         "%sTTYVTDisallocate: %s\n",
1821                         prefix, c->tty_path,
1822                         prefix, yes_no(c->tty_reset),
1823                         prefix, yes_no(c->tty_vhangup),
1824                         prefix, yes_no(c->tty_vt_disallocate));
1825
1826         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1827             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1828             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1829             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1830                 fprintf(f,
1831                         "%sSyslogFacility: %s\n"
1832                         "%sSyslogLevel: %s\n",
1833                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1834                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1835
1836         if (c->capabilities) {
1837                 char *t;
1838                 if ((t = cap_to_text(c->capabilities, NULL))) {
1839                         fprintf(f, "%sCapabilities: %s\n",
1840                                 prefix, t);
1841                         cap_free(t);
1842                 }
1843         }
1844
1845         if (c->secure_bits)
1846                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1847                         prefix,
1848                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1849                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1850                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1851                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1852                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1853                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1854
1855         if (c->capability_bounding_set_drop) {
1856                 unsigned long l;
1857                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1858
1859                 for (l = 0; l <= cap_last_cap(); l++)
1860                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1861                                 char *t;
1862
1863                                 if ((t = cap_to_name(l))) {
1864                                         fprintf(f, " %s", t);
1865                                         cap_free(t);
1866                                 }
1867                         }
1868
1869                 fputs("\n", f);
1870         }
1871
1872         if (c->user)
1873                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1874         if (c->group)
1875                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1876
1877         if (strv_length(c->supplementary_groups) > 0) {
1878                 fprintf(f, "%sSupplementaryGroups:", prefix);
1879                 strv_fprintf(f, c->supplementary_groups);
1880                 fputs("\n", f);
1881         }
1882
1883         if (c->pam_name)
1884                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1885
1886         if (strv_length(c->read_write_dirs) > 0) {
1887                 fprintf(f, "%sReadWriteDirs:", prefix);
1888                 strv_fprintf(f, c->read_write_dirs);
1889                 fputs("\n", f);
1890         }
1891
1892         if (strv_length(c->read_only_dirs) > 0) {
1893                 fprintf(f, "%sReadOnlyDirs:", prefix);
1894                 strv_fprintf(f, c->read_only_dirs);
1895                 fputs("\n", f);
1896         }
1897
1898         if (strv_length(c->inaccessible_dirs) > 0) {
1899                 fprintf(f, "%sInaccessibleDirs:", prefix);
1900                 strv_fprintf(f, c->inaccessible_dirs);
1901                 fputs("\n", f);
1902         }
1903
1904         fprintf(f,
1905                 "%sKillMode: %s\n"
1906                 "%sKillSignal: SIG%s\n"
1907                 "%sSendSIGKILL: %s\n"
1908                 "%sIgnoreSIGPIPE: %s\n",
1909                 prefix, kill_mode_to_string(c->kill_mode),
1910                 prefix, signal_to_string(c->kill_signal),
1911                 prefix, yes_no(c->send_sigkill),
1912                 prefix, yes_no(c->ignore_sigpipe));
1913
1914         if (c->utmp_id)
1915                 fprintf(f,
1916                         "%sUtmpIdentifier: %s\n",
1917                         prefix, c->utmp_id);
1918 }
1919
1920 void exec_status_start(ExecStatus *s, pid_t pid) {
1921         assert(s);
1922
1923         zero(*s);
1924         s->pid = pid;
1925         dual_timestamp_get(&s->start_timestamp);
1926 }
1927
1928 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1929         assert(s);
1930
1931         if (s->pid && s->pid != pid)
1932                 zero(*s);
1933
1934         s->pid = pid;
1935         dual_timestamp_get(&s->exit_timestamp);
1936
1937         s->code = code;
1938         s->status = status;
1939
1940         if (context) {
1941                 if (context->utmp_id)
1942                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1943
1944                 exec_context_tty_reset(context);
1945         }
1946 }
1947
1948 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1949         char buf[FORMAT_TIMESTAMP_MAX];
1950
1951         assert(s);
1952         assert(f);
1953
1954         if (!prefix)
1955                 prefix = "";
1956
1957         if (s->pid <= 0)
1958                 return;
1959
1960         fprintf(f,
1961                 "%sPID: %lu\n",
1962                 prefix, (unsigned long) s->pid);
1963
1964         if (s->start_timestamp.realtime > 0)
1965                 fprintf(f,
1966                         "%sStart Timestamp: %s\n",
1967                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1968
1969         if (s->exit_timestamp.realtime > 0)
1970                 fprintf(f,
1971                         "%sExit Timestamp: %s\n"
1972                         "%sExit Code: %s\n"
1973                         "%sExit Status: %i\n",
1974                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1975                         prefix, sigchld_code_to_string(s->code),
1976                         prefix, s->status);
1977 }
1978
1979 char *exec_command_line(char **argv) {
1980         size_t k;
1981         char *n, *p, **a;
1982         bool first = true;
1983
1984         assert(argv);
1985
1986         k = 1;
1987         STRV_FOREACH(a, argv)
1988                 k += strlen(*a)+3;
1989
1990         if (!(n = new(char, k)))
1991                 return NULL;
1992
1993         p = n;
1994         STRV_FOREACH(a, argv) {
1995
1996                 if (!first)
1997                         *(p++) = ' ';
1998                 else
1999                         first = false;
2000
2001                 if (strpbrk(*a, WHITESPACE)) {
2002                         *(p++) = '\'';
2003                         p = stpcpy(p, *a);
2004                         *(p++) = '\'';
2005                 } else
2006                         p = stpcpy(p, *a);
2007
2008         }
2009
2010         *p = 0;
2011
2012         /* FIXME: this doesn't really handle arguments that have
2013          * spaces and ticks in them */
2014
2015         return n;
2016 }
2017
2018 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2019         char *p2;
2020         const char *prefix2;
2021
2022         char *cmd;
2023
2024         assert(c);
2025         assert(f);
2026
2027         if (!prefix)
2028                 prefix = "";
2029         p2 = strappend(prefix, "\t");
2030         prefix2 = p2 ? p2 : prefix;
2031
2032         cmd = exec_command_line(c->argv);
2033
2034         fprintf(f,
2035                 "%sCommand Line: %s\n",
2036                 prefix, cmd ? cmd : strerror(ENOMEM));
2037
2038         free(cmd);
2039
2040         exec_status_dump(&c->exec_status, f, prefix2);
2041
2042         free(p2);
2043 }
2044
2045 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2046         assert(f);
2047
2048         if (!prefix)
2049                 prefix = "";
2050
2051         LIST_FOREACH(command, c, c)
2052                 exec_command_dump(c, f, prefix);
2053 }
2054
2055 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2056         ExecCommand *end;
2057
2058         assert(l);
2059         assert(e);
2060
2061         if (*l) {
2062                 /* It's kind of important, that we keep the order here */
2063                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2064                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2065         } else
2066               *l = e;
2067 }
2068
2069 int exec_command_set(ExecCommand *c, const char *path, ...) {
2070         va_list ap;
2071         char **l, *p;
2072
2073         assert(c);
2074         assert(path);
2075
2076         va_start(ap, path);
2077         l = strv_new_ap(path, ap);
2078         va_end(ap);
2079
2080         if (!l)
2081                 return -ENOMEM;
2082
2083         if (!(p = strdup(path))) {
2084                 strv_free(l);
2085                 return -ENOMEM;
2086         }
2087
2088         free(c->path);
2089         c->path = p;
2090
2091         strv_free(c->argv);
2092         c->argv = l;
2093
2094         return 0;
2095 }
2096
2097 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2098         [EXEC_INPUT_NULL] = "null",
2099         [EXEC_INPUT_TTY] = "tty",
2100         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2101         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2102         [EXEC_INPUT_SOCKET] = "socket"
2103 };
2104
2105 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2106
2107 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2108         [EXEC_OUTPUT_INHERIT] = "inherit",
2109         [EXEC_OUTPUT_NULL] = "null",
2110         [EXEC_OUTPUT_TTY] = "tty",
2111         [EXEC_OUTPUT_SYSLOG] = "syslog",
2112         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2113         [EXEC_OUTPUT_KMSG] = "kmsg",
2114         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2115         [EXEC_OUTPUT_JOURNAL] = "journal",
2116         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2117         [EXEC_OUTPUT_SOCKET] = "socket"
2118 };
2119
2120 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2121
2122 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2123         [KILL_CONTROL_GROUP] = "control-group",
2124         [KILL_PROCESS] = "process",
2125         [KILL_NONE] = "none"
2126 };
2127
2128 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2129
2130 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2131         [KILL_MAIN] = "main",
2132         [KILL_CONTROL] = "control",
2133         [KILL_ALL] = "all"
2134 };
2135
2136 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);