chiark / gitweb /
unit: signal explicitly if a condition failed in unit_start()
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40
41 #ifdef HAVE_PAM
42 #include <security/pam_appl.h>
43 #endif
44
45 #include "execute.h"
46 #include "strv.h"
47 #include "macro.h"
48 #include "capability.h"
49 #include "util.h"
50 #include "log.h"
51 #include "ioprio.h"
52 #include "securebits.h"
53 #include "cgroup.h"
54 #include "namespace.h"
55 #include "tcpwrap.h"
56 #include "exit-status.h"
57 #include "missing.h"
58 #include "utmp-wtmp.h"
59 #include "def.h"
60 #include "loopback-setup.h"
61
62 /* This assumes there is a 'tty' group */
63 #define TTY_MODE 0620
64
65 static int shift_fds(int fds[], unsigned n_fds) {
66         int start, restart_from;
67
68         if (n_fds <= 0)
69                 return 0;
70
71         /* Modifies the fds array! (sorts it) */
72
73         assert(fds);
74
75         start = 0;
76         for (;;) {
77                 int i;
78
79                 restart_from = -1;
80
81                 for (i = start; i < (int) n_fds; i++) {
82                         int nfd;
83
84                         /* Already at right index? */
85                         if (fds[i] == i+3)
86                                 continue;
87
88                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
89                                 return -errno;
90
91                         close_nointr_nofail(fds[i]);
92                         fds[i] = nfd;
93
94                         /* Hmm, the fd we wanted isn't free? Then
95                          * let's remember that and try again from here*/
96                         if (nfd != i+3 && restart_from < 0)
97                                 restart_from = i;
98                 }
99
100                 if (restart_from < 0)
101                         break;
102
103                 start = restart_from;
104         }
105
106         return 0;
107 }
108
109 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
110         unsigned i;
111         int r;
112
113         if (n_fds <= 0)
114                 return 0;
115
116         assert(fds);
117
118         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
119
120         for (i = 0; i < n_fds; i++) {
121
122                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
123                         return r;
124
125                 /* We unconditionally drop FD_CLOEXEC from the fds,
126                  * since after all we want to pass these fds to our
127                  * children */
128
129                 if ((r = fd_cloexec(fds[i], false)) < 0)
130                         return r;
131         }
132
133         return 0;
134 }
135
136 static const char *tty_path(const ExecContext *context) {
137         assert(context);
138
139         if (context->tty_path)
140                 return context->tty_path;
141
142         return "/dev/console";
143 }
144
145 void exec_context_tty_reset(const ExecContext *context) {
146         assert(context);
147
148         if (context->tty_vhangup)
149                 terminal_vhangup(tty_path(context));
150
151         if (context->tty_reset)
152                 reset_terminal(tty_path(context));
153
154         if (context->tty_vt_disallocate && context->tty_path)
155                 vt_disallocate(context->tty_path);
156 }
157
158 static int open_null_as(int flags, int nfd) {
159         int fd, r;
160
161         assert(nfd >= 0);
162
163         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
164                 return -errno;
165
166         if (fd != nfd) {
167                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
168                 close_nointr_nofail(fd);
169         } else
170                 r = nfd;
171
172         return r;
173 }
174
175 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
176         int fd, r;
177         union sockaddr_union sa;
178
179         assert(context);
180         assert(output < _EXEC_OUTPUT_MAX);
181         assert(ident);
182         assert(nfd >= 0);
183
184         fd = socket(AF_UNIX, SOCK_STREAM, 0);
185         if (fd < 0)
186                 return -errno;
187
188         zero(sa);
189         sa.un.sun_family = AF_UNIX;
190         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
191
192         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
193         if (r < 0) {
194                 close_nointr_nofail(fd);
195                 return -errno;
196         }
197
198         if (shutdown(fd, SHUT_RD) < 0) {
199                 close_nointr_nofail(fd);
200                 return -errno;
201         }
202
203         dprintf(fd,
204                 "%s\n"
205                 "%i\n"
206                 "%i\n"
207                 "%i\n"
208                 "%i\n"
209                 "%i\n",
210                 context->syslog_identifier ? context->syslog_identifier : ident,
211                 context->syslog_priority,
212                 !!context->syslog_level_prefix,
213                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
214                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
215                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
216
217         if (fd != nfd) {
218                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
219                 close_nointr_nofail(fd);
220         } else
221                 r = nfd;
222
223         return r;
224 }
225 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
226         int fd, r;
227
228         assert(path);
229         assert(nfd >= 0);
230
231         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
232                 return fd;
233
234         if (fd != nfd) {
235                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
236                 close_nointr_nofail(fd);
237         } else
238                 r = nfd;
239
240         return r;
241 }
242
243 static bool is_terminal_input(ExecInput i) {
244         return
245                 i == EXEC_INPUT_TTY ||
246                 i == EXEC_INPUT_TTY_FORCE ||
247                 i == EXEC_INPUT_TTY_FAIL;
248 }
249
250 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
251
252         if (is_terminal_input(std_input) && !apply_tty_stdin)
253                 return EXEC_INPUT_NULL;
254
255         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
256                 return EXEC_INPUT_NULL;
257
258         return std_input;
259 }
260
261 static int fixup_output(ExecOutput std_output, int socket_fd) {
262
263         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
264                 return EXEC_OUTPUT_INHERIT;
265
266         return std_output;
267 }
268
269 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
270         ExecInput i;
271
272         assert(context);
273
274         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
275
276         switch (i) {
277
278         case EXEC_INPUT_NULL:
279                 return open_null_as(O_RDONLY, STDIN_FILENO);
280
281         case EXEC_INPUT_TTY:
282         case EXEC_INPUT_TTY_FORCE:
283         case EXEC_INPUT_TTY_FAIL: {
284                 int fd, r;
285
286                 if ((fd = acquire_terminal(
287                                      tty_path(context),
288                                      i == EXEC_INPUT_TTY_FAIL,
289                                      i == EXEC_INPUT_TTY_FORCE,
290                                      false)) < 0)
291                         return fd;
292
293                 if (fd != STDIN_FILENO) {
294                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
295                         close_nointr_nofail(fd);
296                 } else
297                         r = STDIN_FILENO;
298
299                 return r;
300         }
301
302         case EXEC_INPUT_SOCKET:
303                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
304
305         default:
306                 assert_not_reached("Unknown input type");
307         }
308 }
309
310 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
311         ExecOutput o;
312         ExecInput i;
313
314         assert(context);
315         assert(ident);
316
317         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
318         o = fixup_output(context->std_output, socket_fd);
319
320         /* This expects the input is already set up */
321
322         switch (o) {
323
324         case EXEC_OUTPUT_INHERIT:
325
326                 /* If input got downgraded, inherit the original value */
327                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
328                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
329
330                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
331                 if (i != EXEC_INPUT_NULL)
332                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
333
334                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
335                 if (getppid() != 1)
336                         return STDOUT_FILENO;
337
338                 /* We need to open /dev/null here anew, to get the
339                  * right access mode. So we fall through */
340
341         case EXEC_OUTPUT_NULL:
342                 return open_null_as(O_WRONLY, STDOUT_FILENO);
343
344         case EXEC_OUTPUT_TTY:
345                 if (is_terminal_input(i))
346                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
347
348                 /* We don't reset the terminal if this is just about output */
349                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
350
351         case EXEC_OUTPUT_SYSLOG:
352         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
353         case EXEC_OUTPUT_KMSG:
354         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
355         case EXEC_OUTPUT_JOURNAL:
356         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
357                 return connect_logger_as(context, o, ident, STDOUT_FILENO);
358
359         case EXEC_OUTPUT_SOCKET:
360                 assert(socket_fd >= 0);
361                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
362
363         default:
364                 assert_not_reached("Unknown output type");
365         }
366 }
367
368 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
369         ExecOutput o, e;
370         ExecInput i;
371
372         assert(context);
373         assert(ident);
374
375         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
376         o = fixup_output(context->std_output, socket_fd);
377         e = fixup_output(context->std_error, socket_fd);
378
379         /* This expects the input and output are already set up */
380
381         /* Don't change the stderr file descriptor if we inherit all
382          * the way and are not on a tty */
383         if (e == EXEC_OUTPUT_INHERIT &&
384             o == EXEC_OUTPUT_INHERIT &&
385             i == EXEC_INPUT_NULL &&
386             !is_terminal_input(context->std_input) &&
387             getppid () != 1)
388                 return STDERR_FILENO;
389
390         /* Duplicate from stdout if possible */
391         if (e == o || e == EXEC_OUTPUT_INHERIT)
392                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
393
394         switch (e) {
395
396         case EXEC_OUTPUT_NULL:
397                 return open_null_as(O_WRONLY, STDERR_FILENO);
398
399         case EXEC_OUTPUT_TTY:
400                 if (is_terminal_input(i))
401                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
402
403                 /* We don't reset the terminal if this is just about output */
404                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
405
406         case EXEC_OUTPUT_SYSLOG:
407         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
408         case EXEC_OUTPUT_KMSG:
409         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
410         case EXEC_OUTPUT_JOURNAL:
411         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
412                 return connect_logger_as(context, e, ident, STDERR_FILENO);
413
414         case EXEC_OUTPUT_SOCKET:
415                 assert(socket_fd >= 0);
416                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
417
418         default:
419                 assert_not_reached("Unknown error type");
420         }
421 }
422
423 static int chown_terminal(int fd, uid_t uid) {
424         struct stat st;
425
426         assert(fd >= 0);
427
428         /* This might fail. What matters are the results. */
429         (void) fchown(fd, uid, -1);
430         (void) fchmod(fd, TTY_MODE);
431
432         if (fstat(fd, &st) < 0)
433                 return -errno;
434
435         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
436                 return -EPERM;
437
438         return 0;
439 }
440
441 static int setup_confirm_stdio(const ExecContext *context,
442                                int *_saved_stdin,
443                                int *_saved_stdout) {
444         int fd = -1, saved_stdin, saved_stdout = -1, r;
445
446         assert(context);
447         assert(_saved_stdin);
448         assert(_saved_stdout);
449
450         /* This returns positive EXIT_xxx return values instead of
451          * negative errno style values! */
452
453         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
454                 return EXIT_STDIN;
455
456         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
457                 r = EXIT_STDOUT;
458                 goto fail;
459         }
460
461         if ((fd = acquire_terminal(
462                              tty_path(context),
463                              context->std_input == EXEC_INPUT_TTY_FAIL,
464                              context->std_input == EXEC_INPUT_TTY_FORCE,
465                              false)) < 0) {
466                 r = EXIT_STDIN;
467                 goto fail;
468         }
469
470         if (chown_terminal(fd, getuid()) < 0) {
471                 r = EXIT_STDIN;
472                 goto fail;
473         }
474
475         if (dup2(fd, STDIN_FILENO) < 0) {
476                 r = EXIT_STDIN;
477                 goto fail;
478         }
479
480         if (dup2(fd, STDOUT_FILENO) < 0) {
481                 r = EXIT_STDOUT;
482                 goto fail;
483         }
484
485         if (fd >= 2)
486                 close_nointr_nofail(fd);
487
488         *_saved_stdin = saved_stdin;
489         *_saved_stdout = saved_stdout;
490
491         return 0;
492
493 fail:
494         if (saved_stdout >= 0)
495                 close_nointr_nofail(saved_stdout);
496
497         if (saved_stdin >= 0)
498                 close_nointr_nofail(saved_stdin);
499
500         if (fd >= 0)
501                 close_nointr_nofail(fd);
502
503         return r;
504 }
505
506 static int restore_confirm_stdio(const ExecContext *context,
507                                  int *saved_stdin,
508                                  int *saved_stdout,
509                                  bool *keep_stdin,
510                                  bool *keep_stdout) {
511
512         assert(context);
513         assert(saved_stdin);
514         assert(*saved_stdin >= 0);
515         assert(saved_stdout);
516         assert(*saved_stdout >= 0);
517
518         /* This returns positive EXIT_xxx return values instead of
519          * negative errno style values! */
520
521         if (is_terminal_input(context->std_input)) {
522
523                 /* The service wants terminal input. */
524
525                 *keep_stdin = true;
526                 *keep_stdout =
527                         context->std_output == EXEC_OUTPUT_INHERIT ||
528                         context->std_output == EXEC_OUTPUT_TTY;
529
530         } else {
531                 /* If the service doesn't want a controlling terminal,
532                  * then we need to get rid entirely of what we have
533                  * already. */
534
535                 if (release_terminal() < 0)
536                         return EXIT_STDIN;
537
538                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
539                         return EXIT_STDIN;
540
541                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
542                         return EXIT_STDOUT;
543
544                 *keep_stdout = *keep_stdin = false;
545         }
546
547         return 0;
548 }
549
550 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
551         bool keep_groups = false;
552         int r;
553
554         assert(context);
555
556         /* Lookup and set GID and supplementary group list. Here too
557          * we avoid NSS lookups for gid=0. */
558
559         if (context->group || username) {
560
561                 if (context->group) {
562                         const char *g = context->group;
563
564                         if ((r = get_group_creds(&g, &gid)) < 0)
565                                 return r;
566                 }
567
568                 /* First step, initialize groups from /etc/groups */
569                 if (username && gid != 0) {
570                         if (initgroups(username, gid) < 0)
571                                 return -errno;
572
573                         keep_groups = true;
574                 }
575
576                 /* Second step, set our gids */
577                 if (setresgid(gid, gid, gid) < 0)
578                         return -errno;
579         }
580
581         if (context->supplementary_groups) {
582                 int ngroups_max, k;
583                 gid_t *gids;
584                 char **i;
585
586                 /* Final step, initialize any manually set supplementary groups */
587                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
588
589                 if (!(gids = new(gid_t, ngroups_max)))
590                         return -ENOMEM;
591
592                 if (keep_groups) {
593                         if ((k = getgroups(ngroups_max, gids)) < 0) {
594                                 free(gids);
595                                 return -errno;
596                         }
597                 } else
598                         k = 0;
599
600                 STRV_FOREACH(i, context->supplementary_groups) {
601                         const char *g;
602
603                         if (k >= ngroups_max) {
604                                 free(gids);
605                                 return -E2BIG;
606                         }
607
608                         g = *i;
609                         r = get_group_creds(&g, gids+k);
610                         if (r < 0) {
611                                 free(gids);
612                                 return r;
613                         }
614
615                         k++;
616                 }
617
618                 if (setgroups(k, gids) < 0) {
619                         free(gids);
620                         return -errno;
621                 }
622
623                 free(gids);
624         }
625
626         return 0;
627 }
628
629 static int enforce_user(const ExecContext *context, uid_t uid) {
630         int r;
631         assert(context);
632
633         /* Sets (but doesn't lookup) the uid and make sure we keep the
634          * capabilities while doing so. */
635
636         if (context->capabilities) {
637                 cap_t d;
638                 static const cap_value_t bits[] = {
639                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
640                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
641                 };
642
643                 /* First step: If we need to keep capabilities but
644                  * drop privileges we need to make sure we keep our
645                  * caps, whiel we drop privileges. */
646                 if (uid != 0) {
647                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
648
649                         if (prctl(PR_GET_SECUREBITS) != sb)
650                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
651                                         return -errno;
652                 }
653
654                 /* Second step: set the capabilities. This will reduce
655                  * the capabilities to the minimum we need. */
656
657                 if (!(d = cap_dup(context->capabilities)))
658                         return -errno;
659
660                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
661                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
662                         r = -errno;
663                         cap_free(d);
664                         return r;
665                 }
666
667                 if (cap_set_proc(d) < 0) {
668                         r = -errno;
669                         cap_free(d);
670                         return r;
671                 }
672
673                 cap_free(d);
674         }
675
676         /* Third step: actually set the uids */
677         if (setresuid(uid, uid, uid) < 0)
678                 return -errno;
679
680         /* At this point we should have all necessary capabilities but
681            are otherwise a normal user. However, the caps might got
682            corrupted due to the setresuid() so we need clean them up
683            later. This is done outside of this call. */
684
685         return 0;
686 }
687
688 #ifdef HAVE_PAM
689
690 static int null_conv(
691                 int num_msg,
692                 const struct pam_message **msg,
693                 struct pam_response **resp,
694                 void *appdata_ptr) {
695
696         /* We don't support conversations */
697
698         return PAM_CONV_ERR;
699 }
700
701 static int setup_pam(
702                 const char *name,
703                 const char *user,
704                 const char *tty,
705                 char ***pam_env,
706                 int fds[], unsigned n_fds) {
707
708         static const struct pam_conv conv = {
709                 .conv = null_conv,
710                 .appdata_ptr = NULL
711         };
712
713         pam_handle_t *handle = NULL;
714         sigset_t ss, old_ss;
715         int pam_code = PAM_SUCCESS;
716         int err;
717         char **e = NULL;
718         bool close_session = false;
719         pid_t pam_pid = 0, parent_pid;
720
721         assert(name);
722         assert(user);
723         assert(pam_env);
724
725         /* We set up PAM in the parent process, then fork. The child
726          * will then stay around until killed via PR_GET_PDEATHSIG or
727          * systemd via the cgroup logic. It will then remove the PAM
728          * session again. The parent process will exec() the actual
729          * daemon. We do things this way to ensure that the main PID
730          * of the daemon is the one we initially fork()ed. */
731
732         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
733                 handle = NULL;
734                 goto fail;
735         }
736
737         if (tty)
738                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
739                         goto fail;
740
741         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
742                 goto fail;
743
744         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
745                 goto fail;
746
747         close_session = true;
748
749         if ((!(e = pam_getenvlist(handle)))) {
750                 pam_code = PAM_BUF_ERR;
751                 goto fail;
752         }
753
754         /* Block SIGTERM, so that we know that it won't get lost in
755          * the child */
756         if (sigemptyset(&ss) < 0 ||
757             sigaddset(&ss, SIGTERM) < 0 ||
758             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
759                 goto fail;
760
761         parent_pid = getpid();
762
763         if ((pam_pid = fork()) < 0)
764                 goto fail;
765
766         if (pam_pid == 0) {
767                 int sig;
768                 int r = EXIT_PAM;
769
770                 /* The child's job is to reset the PAM session on
771                  * termination */
772
773                 /* This string must fit in 10 chars (i.e. the length
774                  * of "/sbin/init"), to look pretty in /bin/ps */
775                 rename_process("(sd-pam)");
776
777                 /* Make sure we don't keep open the passed fds in this
778                 child. We assume that otherwise only those fds are
779                 open here that have been opened by PAM. */
780                 close_many(fds, n_fds);
781
782                 /* Wait until our parent died. This will most likely
783                  * not work since the kernel does not allow
784                  * unprivileged parents kill their privileged children
785                  * this way. We rely on the control groups kill logic
786                  * to do the rest for us. */
787                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
788                         goto child_finish;
789
790                 /* Check if our parent process might already have
791                  * died? */
792                 if (getppid() == parent_pid) {
793                         for (;;) {
794                                 if (sigwait(&ss, &sig) < 0) {
795                                         if (errno == EINTR)
796                                                 continue;
797
798                                         goto child_finish;
799                                 }
800
801                                 assert(sig == SIGTERM);
802                                 break;
803                         }
804                 }
805
806                 /* If our parent died we'll end the session */
807                 if (getppid() != parent_pid)
808                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
809                                 goto child_finish;
810
811                 r = 0;
812
813         child_finish:
814                 pam_end(handle, pam_code | PAM_DATA_SILENT);
815                 _exit(r);
816         }
817
818         /* If the child was forked off successfully it will do all the
819          * cleanups, so forget about the handle here. */
820         handle = NULL;
821
822         /* Unblock SIGTERM again in the parent */
823         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
824                 goto fail;
825
826         /* We close the log explicitly here, since the PAM modules
827          * might have opened it, but we don't want this fd around. */
828         closelog();
829
830         *pam_env = e;
831         e = NULL;
832
833         return 0;
834
835 fail:
836         if (pam_code != PAM_SUCCESS)
837                 err = -EPERM;  /* PAM errors do not map to errno */
838         else
839                 err = -errno;
840
841         if (handle) {
842                 if (close_session)
843                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
844
845                 pam_end(handle, pam_code | PAM_DATA_SILENT);
846         }
847
848         strv_free(e);
849
850         closelog();
851
852         if (pam_pid > 1) {
853                 kill(pam_pid, SIGTERM);
854                 kill(pam_pid, SIGCONT);
855         }
856
857         return err;
858 }
859 #endif
860
861 static int do_capability_bounding_set_drop(uint64_t drop) {
862         unsigned long i;
863         cap_t old_cap = NULL, new_cap = NULL;
864         cap_flag_value_t fv;
865         int r;
866
867         /* If we are run as PID 1 we will lack CAP_SETPCAP by default
868          * in the effective set (yes, the kernel drops that when
869          * executing init!), so get it back temporarily so that we can
870          * call PR_CAPBSET_DROP. */
871
872         old_cap = cap_get_proc();
873         if (!old_cap)
874                 return -errno;
875
876         if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
877                 r = -errno;
878                 goto finish;
879         }
880
881         if (fv != CAP_SET) {
882                 static const cap_value_t v = CAP_SETPCAP;
883
884                 new_cap = cap_dup(old_cap);
885                 if (!new_cap) {
886                         r = -errno;
887                         goto finish;
888                 }
889
890                 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
891                         r = -errno;
892                         goto finish;
893                 }
894
895                 if (cap_set_proc(new_cap) < 0) {
896                         r = -errno;
897                         goto finish;
898                 }
899         }
900
901         for (i = 0; i <= cap_last_cap(); i++)
902                 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
903                         if (prctl(PR_CAPBSET_DROP, i) < 0) {
904                                 r = -errno;
905                                 goto finish;
906                         }
907                 }
908
909         r = 0;
910
911 finish:
912         if (new_cap)
913                 cap_free(new_cap);
914
915         if (old_cap) {
916                 cap_set_proc(old_cap);
917                 cap_free(old_cap);
918         }
919
920         return r;
921 }
922
923 static void rename_process_from_path(const char *path) {
924         char process_name[11];
925         const char *p;
926         size_t l;
927
928         /* This resulting string must fit in 10 chars (i.e. the length
929          * of "/sbin/init") to look pretty in /bin/ps */
930
931         p = file_name_from_path(path);
932         if (isempty(p)) {
933                 rename_process("(...)");
934                 return;
935         }
936
937         l = strlen(p);
938         if (l > 8) {
939                 /* The end of the process name is usually more
940                  * interesting, since the first bit might just be
941                  * "systemd-" */
942                 p = p + l - 8;
943                 l = 8;
944         }
945
946         process_name[0] = '(';
947         memcpy(process_name+1, p, l);
948         process_name[1+l] = ')';
949         process_name[1+l+1] = 0;
950
951         rename_process(process_name);
952 }
953
954 int exec_spawn(ExecCommand *command,
955                char **argv,
956                const ExecContext *context,
957                int fds[], unsigned n_fds,
958                char **environment,
959                bool apply_permissions,
960                bool apply_chroot,
961                bool apply_tty_stdin,
962                bool confirm_spawn,
963                CGroupBonding *cgroup_bondings,
964                CGroupAttribute *cgroup_attributes,
965                pid_t *ret) {
966
967         pid_t pid;
968         int r;
969         char *line;
970         int socket_fd;
971         char **files_env = NULL;
972
973         assert(command);
974         assert(context);
975         assert(ret);
976         assert(fds || n_fds <= 0);
977
978         if (context->std_input == EXEC_INPUT_SOCKET ||
979             context->std_output == EXEC_OUTPUT_SOCKET ||
980             context->std_error == EXEC_OUTPUT_SOCKET) {
981
982                 if (n_fds != 1)
983                         return -EINVAL;
984
985                 socket_fd = fds[0];
986
987                 fds = NULL;
988                 n_fds = 0;
989         } else
990                 socket_fd = -1;
991
992         if ((r = exec_context_load_environment(context, &files_env)) < 0) {
993                 log_error("Failed to load environment files: %s", strerror(-r));
994                 return r;
995         }
996
997         if (!argv)
998                 argv = command->argv;
999
1000         if (!(line = exec_command_line(argv))) {
1001                 r = -ENOMEM;
1002                 goto fail_parent;
1003         }
1004
1005         log_debug("About to execute: %s", line);
1006         free(line);
1007
1008         r = cgroup_bonding_realize_list(cgroup_bondings);
1009         if (r < 0)
1010                 goto fail_parent;
1011
1012         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1013
1014         if ((pid = fork()) < 0) {
1015                 r = -errno;
1016                 goto fail_parent;
1017         }
1018
1019         if (pid == 0) {
1020                 int i, err;
1021                 sigset_t ss;
1022                 const char *username = NULL, *home = NULL;
1023                 uid_t uid = (uid_t) -1;
1024                 gid_t gid = (gid_t) -1;
1025                 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1026                 unsigned n_env = 0;
1027                 int saved_stdout = -1, saved_stdin = -1;
1028                 bool keep_stdout = false, keep_stdin = false, set_access = false;
1029
1030                 /* child */
1031
1032                 rename_process_from_path(command->path);
1033
1034                 /* We reset exactly these signals, since they are the
1035                  * only ones we set to SIG_IGN in the main daemon. All
1036                  * others we leave untouched because we set them to
1037                  * SIG_DFL or a valid handler initially, both of which
1038                  * will be demoted to SIG_DFL. */
1039                 default_signals(SIGNALS_CRASH_HANDLER,
1040                                 SIGNALS_IGNORE, -1);
1041
1042                 if (context->ignore_sigpipe)
1043                         ignore_signals(SIGPIPE, -1);
1044
1045                 assert_se(sigemptyset(&ss) == 0);
1046                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1047                         err = -errno;
1048                         r = EXIT_SIGNAL_MASK;
1049                         goto fail_child;
1050                 }
1051
1052                 /* Close sockets very early to make sure we don't
1053                  * block init reexecution because it cannot bind its
1054                  * sockets */
1055                 log_forget_fds();
1056                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1057                                            socket_fd >= 0 ? 1 : n_fds);
1058                 if (err < 0) {
1059                         r = EXIT_FDS;
1060                         goto fail_child;
1061                 }
1062
1063                 if (!context->same_pgrp)
1064                         if (setsid() < 0) {
1065                                 err = -errno;
1066                                 r = EXIT_SETSID;
1067                                 goto fail_child;
1068                         }
1069
1070                 if (context->tcpwrap_name) {
1071                         if (socket_fd >= 0)
1072                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1073                                         err = -EACCES;
1074                                         r = EXIT_TCPWRAP;
1075                                         goto fail_child;
1076                                 }
1077
1078                         for (i = 0; i < (int) n_fds; i++) {
1079                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1080                                         err = -EACCES;
1081                                         r = EXIT_TCPWRAP;
1082                                         goto fail_child;
1083                                 }
1084                         }
1085                 }
1086
1087                 exec_context_tty_reset(context);
1088
1089                 /* We skip the confirmation step if we shall not apply the TTY */
1090                 if (confirm_spawn &&
1091                     (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1092                         char response;
1093
1094                         /* Set up terminal for the question */
1095                         if ((r = setup_confirm_stdio(context,
1096                                                      &saved_stdin, &saved_stdout))) {
1097                                 err = -errno;
1098                                 goto fail_child;
1099                         }
1100
1101                         /* Now ask the question. */
1102                         if (!(line = exec_command_line(argv))) {
1103                                 err = -ENOMEM;
1104                                 r = EXIT_MEMORY;
1105                                 goto fail_child;
1106                         }
1107
1108                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1109                         free(line);
1110
1111                         if (r < 0 || response == 'n') {
1112                                 err = -ECANCELED;
1113                                 r = EXIT_CONFIRM;
1114                                 goto fail_child;
1115                         } else if (response == 's') {
1116                                 err = r = 0;
1117                                 goto fail_child;
1118                         }
1119
1120                         /* Release terminal for the question */
1121                         if ((r = restore_confirm_stdio(context,
1122                                                        &saved_stdin, &saved_stdout,
1123                                                        &keep_stdin, &keep_stdout))) {
1124                                 err = -errno;
1125                                 goto fail_child;
1126                         }
1127                 }
1128
1129                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1130                  * must sure to drop O_NONBLOCK */
1131                 if (socket_fd >= 0)
1132                         fd_nonblock(socket_fd, false);
1133
1134                 if (!keep_stdin) {
1135                         err = setup_input(context, socket_fd, apply_tty_stdin);
1136                         if (err < 0) {
1137                                 r = EXIT_STDIN;
1138                                 goto fail_child;
1139                         }
1140                 }
1141
1142                 if (!keep_stdout) {
1143                         err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1144                         if (err < 0) {
1145                                 r = EXIT_STDOUT;
1146                                 goto fail_child;
1147                         }
1148                 }
1149
1150                 err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1151                 if (err < 0) {
1152                         r = EXIT_STDERR;
1153                         goto fail_child;
1154                 }
1155
1156                 if (cgroup_bondings) {
1157                         err = cgroup_bonding_install_list(cgroup_bondings, 0);
1158                         if (err < 0) {
1159                                 r = EXIT_CGROUP;
1160                                 goto fail_child;
1161                         }
1162                 }
1163
1164                 if (context->oom_score_adjust_set) {
1165                         char t[16];
1166
1167                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1168                         char_array_0(t);
1169
1170                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1171                                 /* Compatibility with Linux <= 2.6.35 */
1172
1173                                 int adj;
1174
1175                                 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1176                                 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1177
1178                                 snprintf(t, sizeof(t), "%i", adj);
1179                                 char_array_0(t);
1180
1181                                 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1182                                     && errno != EACCES) {
1183                                         err = -errno;
1184                                         r = EXIT_OOM_ADJUST;
1185                                         goto fail_child;
1186                                 }
1187                         }
1188                 }
1189
1190                 if (context->nice_set)
1191                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1192                                 err = -errno;
1193                                 r = EXIT_NICE;
1194                                 goto fail_child;
1195                         }
1196
1197                 if (context->cpu_sched_set) {
1198                         struct sched_param param;
1199
1200                         zero(param);
1201                         param.sched_priority = context->cpu_sched_priority;
1202
1203                         if (sched_setscheduler(0, context->cpu_sched_policy |
1204                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1205                                 err = -errno;
1206                                 r = EXIT_SETSCHEDULER;
1207                                 goto fail_child;
1208                         }
1209                 }
1210
1211                 if (context->cpuset)
1212                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1213                                 err = -errno;
1214                                 r = EXIT_CPUAFFINITY;
1215                                 goto fail_child;
1216                         }
1217
1218                 if (context->ioprio_set)
1219                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1220                                 err = -errno;
1221                                 r = EXIT_IOPRIO;
1222                                 goto fail_child;
1223                         }
1224
1225                 if (context->timer_slack_nsec_set)
1226                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1227                                 err = -errno;
1228                                 r = EXIT_TIMERSLACK;
1229                                 goto fail_child;
1230                         }
1231
1232                 if (context->utmp_id)
1233                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1234
1235                 if (context->user) {
1236                         username = context->user;
1237                         err = get_user_creds(&username, &uid, &gid, &home);
1238                         if (err < 0) {
1239                                 r = EXIT_USER;
1240                                 goto fail_child;
1241                         }
1242
1243                         if (is_terminal_input(context->std_input)) {
1244                                 err = chown_terminal(STDIN_FILENO, uid);
1245                                 if (err < 0) {
1246                                         r = EXIT_STDIN;
1247                                         goto fail_child;
1248                                 }
1249                         }
1250
1251                         if (cgroup_bondings && context->control_group_modify) {
1252                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1253                                 if (err >= 0)
1254                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1255                                 if (err < 0) {
1256                                         r = EXIT_CGROUP;
1257                                         goto fail_child;
1258                                 }
1259
1260                                 set_access = true;
1261                         }
1262                 }
1263
1264                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1265                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1266                         if (err < 0) {
1267                                 r = EXIT_CGROUP;
1268                                 goto fail_child;
1269                         }
1270                 }
1271
1272                 if (apply_permissions) {
1273                         err = enforce_groups(context, username, gid);
1274                         if (err < 0) {
1275                                 r = EXIT_GROUP;
1276                                 goto fail_child;
1277                         }
1278                 }
1279
1280                 umask(context->umask);
1281
1282 #ifdef HAVE_PAM
1283                 if (context->pam_name && username) {
1284                         err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1285                         if (err < 0) {
1286                                 r = EXIT_PAM;
1287                                 goto fail_child;
1288                         }
1289                 }
1290 #endif
1291                 if (context->private_network) {
1292                         if (unshare(CLONE_NEWNET) < 0) {
1293                                 err = -errno;
1294                                 r = EXIT_NETWORK;
1295                                 goto fail_child;
1296                         }
1297
1298                         loopback_setup();
1299                 }
1300
1301                 if (strv_length(context->read_write_dirs) > 0 ||
1302                     strv_length(context->read_only_dirs) > 0 ||
1303                     strv_length(context->inaccessible_dirs) > 0 ||
1304                     context->mount_flags != MS_SHARED ||
1305                     context->private_tmp) {
1306                         err = setup_namespace(context->read_write_dirs,
1307                                               context->read_only_dirs,
1308                                               context->inaccessible_dirs,
1309                                               context->private_tmp,
1310                                               context->mount_flags);
1311                         if (err < 0) {
1312                                 r = EXIT_NAMESPACE;
1313                                 goto fail_child;
1314                         }
1315                 }
1316
1317                 if (apply_chroot) {
1318                         if (context->root_directory)
1319                                 if (chroot(context->root_directory) < 0) {
1320                                         err = -errno;
1321                                         r = EXIT_CHROOT;
1322                                         goto fail_child;
1323                                 }
1324
1325                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1326                                 err = -errno;
1327                                 r = EXIT_CHDIR;
1328                                 goto fail_child;
1329                         }
1330                 } else {
1331
1332                         char *d;
1333
1334                         if (asprintf(&d, "%s/%s",
1335                                      context->root_directory ? context->root_directory : "",
1336                                      context->working_directory ? context->working_directory : "") < 0) {
1337                                 err = -ENOMEM;
1338                                 r = EXIT_MEMORY;
1339                                 goto fail_child;
1340                         }
1341
1342                         if (chdir(d) < 0) {
1343                                 err = -errno;
1344                                 free(d);
1345                                 r = EXIT_CHDIR;
1346                                 goto fail_child;
1347                         }
1348
1349                         free(d);
1350                 }
1351
1352                 /* We repeat the fd closing here, to make sure that
1353                  * nothing is leaked from the PAM modules */
1354                 err = close_all_fds(fds, n_fds);
1355                 if (err >= 0)
1356                         err = shift_fds(fds, n_fds);
1357                 if (err >= 0)
1358                         err = flags_fds(fds, n_fds, context->non_blocking);
1359                 if (err < 0) {
1360                         r = EXIT_FDS;
1361                         goto fail_child;
1362                 }
1363
1364                 if (apply_permissions) {
1365
1366                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1367                                 if (!context->rlimit[i])
1368                                         continue;
1369
1370                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1371                                         err = -errno;
1372                                         r = EXIT_LIMITS;
1373                                         goto fail_child;
1374                                 }
1375                         }
1376
1377                         if (context->capability_bounding_set_drop) {
1378                                 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1379                                 if (err < 0) {
1380                                         r = EXIT_CAPABILITIES;
1381                                         goto fail_child;
1382                                 }
1383                         }
1384
1385                         if (context->user) {
1386                                 err = enforce_user(context, uid);
1387                                 if (err < 0) {
1388                                         r = EXIT_USER;
1389                                         goto fail_child;
1390                                 }
1391                         }
1392
1393                         /* PR_GET_SECUREBITS is not privileged, while
1394                          * PR_SET_SECUREBITS is. So to suppress
1395                          * potential EPERMs we'll try not to call
1396                          * PR_SET_SECUREBITS unless necessary. */
1397                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1398                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1399                                         err = -errno;
1400                                         r = EXIT_SECUREBITS;
1401                                         goto fail_child;
1402                                 }
1403
1404                         if (context->capabilities)
1405                                 if (cap_set_proc(context->capabilities) < 0) {
1406                                         err = -errno;
1407                                         r = EXIT_CAPABILITIES;
1408                                         goto fail_child;
1409                                 }
1410                 }
1411
1412                 if (!(our_env = new0(char*, 7))) {
1413                         err = -ENOMEM;
1414                         r = EXIT_MEMORY;
1415                         goto fail_child;
1416                 }
1417
1418                 if (n_fds > 0)
1419                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1420                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1421                                 err = -ENOMEM;
1422                                 r = EXIT_MEMORY;
1423                                 goto fail_child;
1424                         }
1425
1426                 if (home)
1427                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1428                                 err = -ENOMEM;
1429                                 r = EXIT_MEMORY;
1430                                 goto fail_child;
1431                         }
1432
1433                 if (username)
1434                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1435                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1436                                 err = -ENOMEM;
1437                                 r = EXIT_MEMORY;
1438                                 goto fail_child;
1439                         }
1440
1441                 if (is_terminal_input(context->std_input) ||
1442                     context->std_output == EXEC_OUTPUT_TTY ||
1443                     context->std_error == EXEC_OUTPUT_TTY)
1444                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1445                                 err = -ENOMEM;
1446                                 r = EXIT_MEMORY;
1447                                 goto fail_child;
1448                         }
1449
1450                 assert(n_env <= 7);
1451
1452                 if (!(final_env = strv_env_merge(
1453                                       5,
1454                                       environment,
1455                                       our_env,
1456                                       context->environment,
1457                                       files_env,
1458                                       pam_env,
1459                                       NULL))) {
1460                         err = -ENOMEM;
1461                         r = EXIT_MEMORY;
1462                         goto fail_child;
1463                 }
1464
1465                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1466                         err = -ENOMEM;
1467                         r = EXIT_MEMORY;
1468                         goto fail_child;
1469                 }
1470
1471                 final_env = strv_env_clean(final_env);
1472
1473                 execve(command->path, final_argv, final_env);
1474                 err = -errno;
1475                 r = EXIT_EXEC;
1476
1477         fail_child:
1478                 if (r != 0) {
1479                         log_open();
1480                         log_warning("Failed at step %s spawning %s: %s",
1481                                     exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1482                                     command->path, strerror(-err));
1483                 }
1484
1485                 strv_free(our_env);
1486                 strv_free(final_env);
1487                 strv_free(pam_env);
1488                 strv_free(files_env);
1489                 strv_free(final_argv);
1490
1491                 if (saved_stdin >= 0)
1492                         close_nointr_nofail(saved_stdin);
1493
1494                 if (saved_stdout >= 0)
1495                         close_nointr_nofail(saved_stdout);
1496
1497                 _exit(r);
1498         }
1499
1500         strv_free(files_env);
1501
1502         /* We add the new process to the cgroup both in the child (so
1503          * that we can be sure that no user code is ever executed
1504          * outside of the cgroup) and in the parent (so that we can be
1505          * sure that when we kill the cgroup the process will be
1506          * killed too). */
1507         if (cgroup_bondings)
1508                 cgroup_bonding_install_list(cgroup_bondings, pid);
1509
1510         log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1511
1512         exec_status_start(&command->exec_status, pid);
1513
1514         *ret = pid;
1515         return 0;
1516
1517 fail_parent:
1518         strv_free(files_env);
1519
1520         return r;
1521 }
1522
1523 void exec_context_init(ExecContext *c) {
1524         assert(c);
1525
1526         c->umask = 0022;
1527         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1528         c->cpu_sched_policy = SCHED_OTHER;
1529         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1530         c->syslog_level_prefix = true;
1531         c->mount_flags = MS_SHARED;
1532         c->kill_signal = SIGTERM;
1533         c->send_sigkill = true;
1534         c->control_group_persistent = -1;
1535         c->ignore_sigpipe = true;
1536 }
1537
1538 void exec_context_done(ExecContext *c) {
1539         unsigned l;
1540
1541         assert(c);
1542
1543         strv_free(c->environment);
1544         c->environment = NULL;
1545
1546         strv_free(c->environment_files);
1547         c->environment_files = NULL;
1548
1549         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1550                 free(c->rlimit[l]);
1551                 c->rlimit[l] = NULL;
1552         }
1553
1554         free(c->working_directory);
1555         c->working_directory = NULL;
1556         free(c->root_directory);
1557         c->root_directory = NULL;
1558
1559         free(c->tty_path);
1560         c->tty_path = NULL;
1561
1562         free(c->tcpwrap_name);
1563         c->tcpwrap_name = NULL;
1564
1565         free(c->syslog_identifier);
1566         c->syslog_identifier = NULL;
1567
1568         free(c->user);
1569         c->user = NULL;
1570
1571         free(c->group);
1572         c->group = NULL;
1573
1574         strv_free(c->supplementary_groups);
1575         c->supplementary_groups = NULL;
1576
1577         free(c->pam_name);
1578         c->pam_name = NULL;
1579
1580         if (c->capabilities) {
1581                 cap_free(c->capabilities);
1582                 c->capabilities = NULL;
1583         }
1584
1585         strv_free(c->read_only_dirs);
1586         c->read_only_dirs = NULL;
1587
1588         strv_free(c->read_write_dirs);
1589         c->read_write_dirs = NULL;
1590
1591         strv_free(c->inaccessible_dirs);
1592         c->inaccessible_dirs = NULL;
1593
1594         if (c->cpuset)
1595                 CPU_FREE(c->cpuset);
1596
1597         free(c->utmp_id);
1598         c->utmp_id = NULL;
1599 }
1600
1601 void exec_command_done(ExecCommand *c) {
1602         assert(c);
1603
1604         free(c->path);
1605         c->path = NULL;
1606
1607         strv_free(c->argv);
1608         c->argv = NULL;
1609 }
1610
1611 void exec_command_done_array(ExecCommand *c, unsigned n) {
1612         unsigned i;
1613
1614         for (i = 0; i < n; i++)
1615                 exec_command_done(c+i);
1616 }
1617
1618 void exec_command_free_list(ExecCommand *c) {
1619         ExecCommand *i;
1620
1621         while ((i = c)) {
1622                 LIST_REMOVE(ExecCommand, command, c, i);
1623                 exec_command_done(i);
1624                 free(i);
1625         }
1626 }
1627
1628 void exec_command_free_array(ExecCommand **c, unsigned n) {
1629         unsigned i;
1630
1631         for (i = 0; i < n; i++) {
1632                 exec_command_free_list(c[i]);
1633                 c[i] = NULL;
1634         }
1635 }
1636
1637 int exec_context_load_environment(const ExecContext *c, char ***l) {
1638         char **i, **r = NULL;
1639
1640         assert(c);
1641         assert(l);
1642
1643         STRV_FOREACH(i, c->environment_files) {
1644                 char *fn;
1645                 int k;
1646                 bool ignore = false;
1647                 char **p;
1648
1649                 fn = *i;
1650
1651                 if (fn[0] == '-') {
1652                         ignore = true;
1653                         fn ++;
1654                 }
1655
1656                 if (!path_is_absolute(fn)) {
1657
1658                         if (ignore)
1659                                 continue;
1660
1661                         strv_free(r);
1662                         return -EINVAL;
1663                 }
1664
1665                 if ((k = load_env_file(fn, &p)) < 0) {
1666
1667                         if (ignore)
1668                                 continue;
1669
1670                         strv_free(r);
1671                         return k;
1672                 }
1673
1674                 if (r == NULL)
1675                         r = p;
1676                 else {
1677                         char **m;
1678
1679                         m = strv_env_merge(2, r, p);
1680                         strv_free(r);
1681                         strv_free(p);
1682
1683                         if (!m)
1684                                 return -ENOMEM;
1685
1686                         r = m;
1687                 }
1688         }
1689
1690         *l = r;
1691
1692         return 0;
1693 }
1694
1695 static void strv_fprintf(FILE *f, char **l) {
1696         char **g;
1697
1698         assert(f);
1699
1700         STRV_FOREACH(g, l)
1701                 fprintf(f, " %s", *g);
1702 }
1703
1704 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1705         char ** e;
1706         unsigned i;
1707
1708         assert(c);
1709         assert(f);
1710
1711         if (!prefix)
1712                 prefix = "";
1713
1714         fprintf(f,
1715                 "%sUMask: %04o\n"
1716                 "%sWorkingDirectory: %s\n"
1717                 "%sRootDirectory: %s\n"
1718                 "%sNonBlocking: %s\n"
1719                 "%sPrivateTmp: %s\n"
1720                 "%sControlGroupModify: %s\n"
1721                 "%sControlGroupPersistent: %s\n"
1722                 "%sPrivateNetwork: %s\n",
1723                 prefix, c->umask,
1724                 prefix, c->working_directory ? c->working_directory : "/",
1725                 prefix, c->root_directory ? c->root_directory : "/",
1726                 prefix, yes_no(c->non_blocking),
1727                 prefix, yes_no(c->private_tmp),
1728                 prefix, yes_no(c->control_group_modify),
1729                 prefix, yes_no(c->control_group_persistent),
1730                 prefix, yes_no(c->private_network));
1731
1732         STRV_FOREACH(e, c->environment)
1733                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1734
1735         STRV_FOREACH(e, c->environment_files)
1736                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1737
1738         if (c->tcpwrap_name)
1739                 fprintf(f,
1740                         "%sTCPWrapName: %s\n",
1741                         prefix, c->tcpwrap_name);
1742
1743         if (c->nice_set)
1744                 fprintf(f,
1745                         "%sNice: %i\n",
1746                         prefix, c->nice);
1747
1748         if (c->oom_score_adjust_set)
1749                 fprintf(f,
1750                         "%sOOMScoreAdjust: %i\n",
1751                         prefix, c->oom_score_adjust);
1752
1753         for (i = 0; i < RLIM_NLIMITS; i++)
1754                 if (c->rlimit[i])
1755                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1756
1757         if (c->ioprio_set)
1758                 fprintf(f,
1759                         "%sIOSchedulingClass: %s\n"
1760                         "%sIOPriority: %i\n",
1761                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1762                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1763
1764         if (c->cpu_sched_set)
1765                 fprintf(f,
1766                         "%sCPUSchedulingPolicy: %s\n"
1767                         "%sCPUSchedulingPriority: %i\n"
1768                         "%sCPUSchedulingResetOnFork: %s\n",
1769                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1770                         prefix, c->cpu_sched_priority,
1771                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1772
1773         if (c->cpuset) {
1774                 fprintf(f, "%sCPUAffinity:", prefix);
1775                 for (i = 0; i < c->cpuset_ncpus; i++)
1776                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1777                                 fprintf(f, " %i", i);
1778                 fputs("\n", f);
1779         }
1780
1781         if (c->timer_slack_nsec_set)
1782                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1783
1784         fprintf(f,
1785                 "%sStandardInput: %s\n"
1786                 "%sStandardOutput: %s\n"
1787                 "%sStandardError: %s\n",
1788                 prefix, exec_input_to_string(c->std_input),
1789                 prefix, exec_output_to_string(c->std_output),
1790                 prefix, exec_output_to_string(c->std_error));
1791
1792         if (c->tty_path)
1793                 fprintf(f,
1794                         "%sTTYPath: %s\n"
1795                         "%sTTYReset: %s\n"
1796                         "%sTTYVHangup: %s\n"
1797                         "%sTTYVTDisallocate: %s\n",
1798                         prefix, c->tty_path,
1799                         prefix, yes_no(c->tty_reset),
1800                         prefix, yes_no(c->tty_vhangup),
1801                         prefix, yes_no(c->tty_vt_disallocate));
1802
1803         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1804             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1805             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1806             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1807                 fprintf(f,
1808                         "%sSyslogFacility: %s\n"
1809                         "%sSyslogLevel: %s\n",
1810                         prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1811                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1812
1813         if (c->capabilities) {
1814                 char *t;
1815                 if ((t = cap_to_text(c->capabilities, NULL))) {
1816                         fprintf(f, "%sCapabilities: %s\n",
1817                                 prefix, t);
1818                         cap_free(t);
1819                 }
1820         }
1821
1822         if (c->secure_bits)
1823                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1824                         prefix,
1825                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1826                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1827                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1828                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1829                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1830                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1831
1832         if (c->capability_bounding_set_drop) {
1833                 unsigned long l;
1834                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1835
1836                 for (l = 0; l <= cap_last_cap(); l++)
1837                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1838                                 char *t;
1839
1840                                 if ((t = cap_to_name(l))) {
1841                                         fprintf(f, " %s", t);
1842                                         cap_free(t);
1843                                 }
1844                         }
1845
1846                 fputs("\n", f);
1847         }
1848
1849         if (c->user)
1850                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1851         if (c->group)
1852                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1853
1854         if (strv_length(c->supplementary_groups) > 0) {
1855                 fprintf(f, "%sSupplementaryGroups:", prefix);
1856                 strv_fprintf(f, c->supplementary_groups);
1857                 fputs("\n", f);
1858         }
1859
1860         if (c->pam_name)
1861                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1862
1863         if (strv_length(c->read_write_dirs) > 0) {
1864                 fprintf(f, "%sReadWriteDirs:", prefix);
1865                 strv_fprintf(f, c->read_write_dirs);
1866                 fputs("\n", f);
1867         }
1868
1869         if (strv_length(c->read_only_dirs) > 0) {
1870                 fprintf(f, "%sReadOnlyDirs:", prefix);
1871                 strv_fprintf(f, c->read_only_dirs);
1872                 fputs("\n", f);
1873         }
1874
1875         if (strv_length(c->inaccessible_dirs) > 0) {
1876                 fprintf(f, "%sInaccessibleDirs:", prefix);
1877                 strv_fprintf(f, c->inaccessible_dirs);
1878                 fputs("\n", f);
1879         }
1880
1881         fprintf(f,
1882                 "%sKillMode: %s\n"
1883                 "%sKillSignal: SIG%s\n"
1884                 "%sSendSIGKILL: %s\n"
1885                 "%sIgnoreSIGPIPE: %s\n",
1886                 prefix, kill_mode_to_string(c->kill_mode),
1887                 prefix, signal_to_string(c->kill_signal),
1888                 prefix, yes_no(c->send_sigkill),
1889                 prefix, yes_no(c->ignore_sigpipe));
1890
1891         if (c->utmp_id)
1892                 fprintf(f,
1893                         "%sUtmpIdentifier: %s\n",
1894                         prefix, c->utmp_id);
1895 }
1896
1897 void exec_status_start(ExecStatus *s, pid_t pid) {
1898         assert(s);
1899
1900         zero(*s);
1901         s->pid = pid;
1902         dual_timestamp_get(&s->start_timestamp);
1903 }
1904
1905 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1906         assert(s);
1907
1908         if (s->pid && s->pid != pid)
1909                 zero(*s);
1910
1911         s->pid = pid;
1912         dual_timestamp_get(&s->exit_timestamp);
1913
1914         s->code = code;
1915         s->status = status;
1916
1917         if (context) {
1918                 if (context->utmp_id)
1919                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1920
1921                 exec_context_tty_reset(context);
1922         }
1923 }
1924
1925 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1926         char buf[FORMAT_TIMESTAMP_MAX];
1927
1928         assert(s);
1929         assert(f);
1930
1931         if (!prefix)
1932                 prefix = "";
1933
1934         if (s->pid <= 0)
1935                 return;
1936
1937         fprintf(f,
1938                 "%sPID: %lu\n",
1939                 prefix, (unsigned long) s->pid);
1940
1941         if (s->start_timestamp.realtime > 0)
1942                 fprintf(f,
1943                         "%sStart Timestamp: %s\n",
1944                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1945
1946         if (s->exit_timestamp.realtime > 0)
1947                 fprintf(f,
1948                         "%sExit Timestamp: %s\n"
1949                         "%sExit Code: %s\n"
1950                         "%sExit Status: %i\n",
1951                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1952                         prefix, sigchld_code_to_string(s->code),
1953                         prefix, s->status);
1954 }
1955
1956 char *exec_command_line(char **argv) {
1957         size_t k;
1958         char *n, *p, **a;
1959         bool first = true;
1960
1961         assert(argv);
1962
1963         k = 1;
1964         STRV_FOREACH(a, argv)
1965                 k += strlen(*a)+3;
1966
1967         if (!(n = new(char, k)))
1968                 return NULL;
1969
1970         p = n;
1971         STRV_FOREACH(a, argv) {
1972
1973                 if (!first)
1974                         *(p++) = ' ';
1975                 else
1976                         first = false;
1977
1978                 if (strpbrk(*a, WHITESPACE)) {
1979                         *(p++) = '\'';
1980                         p = stpcpy(p, *a);
1981                         *(p++) = '\'';
1982                 } else
1983                         p = stpcpy(p, *a);
1984
1985         }
1986
1987         *p = 0;
1988
1989         /* FIXME: this doesn't really handle arguments that have
1990          * spaces and ticks in them */
1991
1992         return n;
1993 }
1994
1995 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1996         char *p2;
1997         const char *prefix2;
1998
1999         char *cmd;
2000
2001         assert(c);
2002         assert(f);
2003
2004         if (!prefix)
2005                 prefix = "";
2006         p2 = strappend(prefix, "\t");
2007         prefix2 = p2 ? p2 : prefix;
2008
2009         cmd = exec_command_line(c->argv);
2010
2011         fprintf(f,
2012                 "%sCommand Line: %s\n",
2013                 prefix, cmd ? cmd : strerror(ENOMEM));
2014
2015         free(cmd);
2016
2017         exec_status_dump(&c->exec_status, f, prefix2);
2018
2019         free(p2);
2020 }
2021
2022 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2023         assert(f);
2024
2025         if (!prefix)
2026                 prefix = "";
2027
2028         LIST_FOREACH(command, c, c)
2029                 exec_command_dump(c, f, prefix);
2030 }
2031
2032 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2033         ExecCommand *end;
2034
2035         assert(l);
2036         assert(e);
2037
2038         if (*l) {
2039                 /* It's kind of important, that we keep the order here */
2040                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2041                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2042         } else
2043               *l = e;
2044 }
2045
2046 int exec_command_set(ExecCommand *c, const char *path, ...) {
2047         va_list ap;
2048         char **l, *p;
2049
2050         assert(c);
2051         assert(path);
2052
2053         va_start(ap, path);
2054         l = strv_new_ap(path, ap);
2055         va_end(ap);
2056
2057         if (!l)
2058                 return -ENOMEM;
2059
2060         if (!(p = strdup(path))) {
2061                 strv_free(l);
2062                 return -ENOMEM;
2063         }
2064
2065         free(c->path);
2066         c->path = p;
2067
2068         strv_free(c->argv);
2069         c->argv = l;
2070
2071         return 0;
2072 }
2073
2074 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2075         [EXEC_INPUT_NULL] = "null",
2076         [EXEC_INPUT_TTY] = "tty",
2077         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2078         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2079         [EXEC_INPUT_SOCKET] = "socket"
2080 };
2081
2082 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2083
2084 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2085         [EXEC_OUTPUT_INHERIT] = "inherit",
2086         [EXEC_OUTPUT_NULL] = "null",
2087         [EXEC_OUTPUT_TTY] = "tty",
2088         [EXEC_OUTPUT_SYSLOG] = "syslog",
2089         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2090         [EXEC_OUTPUT_KMSG] = "kmsg",
2091         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2092         [EXEC_OUTPUT_JOURNAL] = "journal",
2093         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2094         [EXEC_OUTPUT_SOCKET] = "socket"
2095 };
2096
2097 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2098
2099 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2100         [KILL_CONTROL_GROUP] = "control-group",
2101         [KILL_PROCESS] = "process",
2102         [KILL_NONE] = "none"
2103 };
2104
2105 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2106
2107 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2108         [KILL_MAIN] = "main",
2109         [KILL_CONTROL] = "control",
2110         [KILL_ALL] = "all"
2111 };
2112
2113 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);