chiark / gitweb /
execute: unify setup_{output,error}
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71
72 /* This assumes there is a 'tty' group */
73 #define TTY_MODE 0620
74
75 static int shift_fds(int fds[], unsigned n_fds) {
76         int start, restart_from;
77
78         if (n_fds <= 0)
79                 return 0;
80
81         /* Modifies the fds array! (sorts it) */
82
83         assert(fds);
84
85         start = 0;
86         for (;;) {
87                 int i;
88
89                 restart_from = -1;
90
91                 for (i = start; i < (int) n_fds; i++) {
92                         int nfd;
93
94                         /* Already at right index? */
95                         if (fds[i] == i+3)
96                                 continue;
97
98                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99                                 return -errno;
100
101                         close_nointr_nofail(fds[i]);
102                         fds[i] = nfd;
103
104                         /* Hmm, the fd we wanted isn't free? Then
105                          * let's remember that and try again from here*/
106                         if (nfd != i+3 && restart_from < 0)
107                                 restart_from = i;
108                 }
109
110                 if (restart_from < 0)
111                         break;
112
113                 start = restart_from;
114         }
115
116         return 0;
117 }
118
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120         unsigned i;
121         int r;
122
123         if (n_fds <= 0)
124                 return 0;
125
126         assert(fds);
127
128         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129
130         for (i = 0; i < n_fds; i++) {
131
132                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133                         return r;
134
135                 /* We unconditionally drop FD_CLOEXEC from the fds,
136                  * since after all we want to pass these fds to our
137                  * children */
138
139                 if ((r = fd_cloexec(fds[i], false)) < 0)
140                         return r;
141         }
142
143         return 0;
144 }
145
146 static const char *tty_path(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_path)
150                 return context->tty_path;
151
152         return "/dev/console";
153 }
154
155 void exec_context_tty_reset(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_vhangup)
159                 terminal_vhangup(tty_path(context));
160
161         if (context->tty_reset)
162                 reset_terminal(tty_path(context));
163
164         if (context->tty_vt_disallocate && context->tty_path)
165                 vt_disallocate(context->tty_path);
166 }
167
168 static int open_null_as(int flags, int nfd) {
169         int fd, r;
170
171         assert(nfd >= 0);
172
173         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
174                 return -errno;
175
176         if (fd != nfd) {
177                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178                 close_nointr_nofail(fd);
179         } else
180                 r = nfd;
181
182         return r;
183 }
184
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
186         int fd, r;
187         union sockaddr_union sa;
188
189         assert(context);
190         assert(output < _EXEC_OUTPUT_MAX);
191         assert(ident);
192         assert(nfd >= 0);
193
194         fd = socket(AF_UNIX, SOCK_STREAM, 0);
195         if (fd < 0)
196                 return -errno;
197
198         zero(sa);
199         sa.un.sun_family = AF_UNIX;
200         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
201
202         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
203         if (r < 0) {
204                 close_nointr_nofail(fd);
205                 return -errno;
206         }
207
208         if (shutdown(fd, SHUT_RD) < 0) {
209                 close_nointr_nofail(fd);
210                 return -errno;
211         }
212
213         dprintf(fd,
214                 "%s\n"
215                 "%s\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n"
219                 "%i\n"
220                 "%i\n",
221                 context->syslog_identifier ? context->syslog_identifier : ident,
222                 unit_id,
223                 context->syslog_priority,
224                 !!context->syslog_level_prefix,
225                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
228
229         if (fd != nfd) {
230                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231                 close_nointr_nofail(fd);
232         } else
233                 r = nfd;
234
235         return r;
236 }
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
238         int fd, r;
239
240         assert(path);
241         assert(nfd >= 0);
242
243         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
244                 return fd;
245
246         if (fd != nfd) {
247                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248                 close_nointr_nofail(fd);
249         } else
250                 r = nfd;
251
252         return r;
253 }
254
255 static bool is_terminal_input(ExecInput i) {
256         return
257                 i == EXEC_INPUT_TTY ||
258                 i == EXEC_INPUT_TTY_FORCE ||
259                 i == EXEC_INPUT_TTY_FAIL;
260 }
261
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
263
264         if (is_terminal_input(std_input) && !apply_tty_stdin)
265                 return EXEC_INPUT_NULL;
266
267         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268                 return EXEC_INPUT_NULL;
269
270         return std_input;
271 }
272
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
274
275         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276                 return EXEC_OUTPUT_INHERIT;
277
278         return std_output;
279 }
280
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
282         ExecInput i;
283
284         assert(context);
285
286         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
287
288         switch (i) {
289
290         case EXEC_INPUT_NULL:
291                 return open_null_as(O_RDONLY, STDIN_FILENO);
292
293         case EXEC_INPUT_TTY:
294         case EXEC_INPUT_TTY_FORCE:
295         case EXEC_INPUT_TTY_FAIL: {
296                 int fd, r;
297
298                 if ((fd = acquire_terminal(
299                                      tty_path(context),
300                                      i == EXEC_INPUT_TTY_FAIL,
301                                      i == EXEC_INPUT_TTY_FORCE,
302                                      false,
303                                      (usec_t) -1)) < 0)
304                         return fd;
305
306                 if (fd != STDIN_FILENO) {
307                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308                         close_nointr_nofail(fd);
309                 } else
310                         r = STDIN_FILENO;
311
312                 return r;
313         }
314
315         case EXEC_INPUT_SOCKET:
316                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
317
318         default:
319                 assert_not_reached("Unknown input type");
320         }
321 }
322
323 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
324         ExecOutput o;
325         ExecInput i;
326         int r;
327
328         assert(context);
329         assert(ident);
330
331         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
332         o = fixup_output(context->std_output, socket_fd);
333
334         if (fileno == STDERR_FILENO) {
335                 ExecOutput e;
336                 e = fixup_output(context->std_error, socket_fd);
337
338                 /* This expects the input and output are already set up */
339
340                 /* Don't change the stderr file descriptor if we inherit all
341                  * the way and are not on a tty */
342                 if (e == EXEC_OUTPUT_INHERIT &&
343                     o == EXEC_OUTPUT_INHERIT &&
344                     i == EXEC_INPUT_NULL &&
345                     !is_terminal_input(context->std_input) &&
346                     getppid () != 1)
347                         return fileno;
348
349                 /* Duplicate from stdout if possible */
350                 if (e == o || e == EXEC_OUTPUT_INHERIT)
351                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
352
353                 o = e;
354
355         } else if (o == EXEC_OUTPUT_INHERIT) {
356                 /* If input got downgraded, inherit the original value */
357                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
358                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
359
360                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
361                 if (i != EXEC_INPUT_NULL)
362                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
363
364                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
365                 if (getppid() != 1)
366                         return fileno;
367
368                 /* We need to open /dev/null here anew, to get the right access mode. */
369                 return open_null_as(O_WRONLY, fileno);
370         }
371
372         switch (o) {
373
374         case EXEC_OUTPUT_NULL:
375                 return open_null_as(O_WRONLY, fileno);
376
377         case EXEC_OUTPUT_TTY:
378                 if (is_terminal_input(i))
379                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
380
381                 /* We don't reset the terminal if this is just about output */
382                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
383
384         case EXEC_OUTPUT_SYSLOG:
385         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
386         case EXEC_OUTPUT_KMSG:
387         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
388         case EXEC_OUTPUT_JOURNAL:
389         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
390                 r = connect_logger_as(context, o, ident, unit_id, fileno);
391                 if (r < 0) {
392                         log_error("Failed to connect std%s of %s to the journal socket: %s",
393                                 fileno == STDOUT_FILENO ? "out" : "err",
394                                 unit_id, strerror(-r));
395                         r = open_null_as(O_WRONLY, fileno);
396                 }
397                 return r;
398
399         case EXEC_OUTPUT_SOCKET:
400                 assert(socket_fd >= 0);
401                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
402
403         default:
404                 assert_not_reached("Unknown error type");
405         }
406 }
407
408 static int chown_terminal(int fd, uid_t uid) {
409         struct stat st;
410
411         assert(fd >= 0);
412
413         /* This might fail. What matters are the results. */
414         (void) fchown(fd, uid, -1);
415         (void) fchmod(fd, TTY_MODE);
416
417         if (fstat(fd, &st) < 0)
418                 return -errno;
419
420         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
421                 return -EPERM;
422
423         return 0;
424 }
425
426 static int setup_confirm_stdio(int *_saved_stdin,
427                                int *_saved_stdout) {
428         int fd = -1, saved_stdin, saved_stdout = -1, r;
429
430         assert(_saved_stdin);
431         assert(_saved_stdout);
432
433         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
434         if (saved_stdin < 0)
435                 return -errno;
436
437         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
438         if (saved_stdout < 0) {
439                 r = errno;
440                 goto fail;
441         }
442
443         fd = acquire_terminal(
444                         "/dev/console",
445                         false,
446                         false,
447                         false,
448                         DEFAULT_CONFIRM_USEC);
449         if (fd < 0) {
450                 r = fd;
451                 goto fail;
452         }
453
454         r = chown_terminal(fd, getuid());
455         if (r < 0)
456                 goto fail;
457
458         if (dup2(fd, STDIN_FILENO) < 0) {
459                 r = -errno;
460                 goto fail;
461         }
462
463         if (dup2(fd, STDOUT_FILENO) < 0) {
464                 r = -errno;
465                 goto fail;
466         }
467
468         if (fd >= 2)
469                 close_nointr_nofail(fd);
470
471         *_saved_stdin = saved_stdin;
472         *_saved_stdout = saved_stdout;
473
474         return 0;
475
476 fail:
477         if (saved_stdout >= 0)
478                 close_nointr_nofail(saved_stdout);
479
480         if (saved_stdin >= 0)
481                 close_nointr_nofail(saved_stdin);
482
483         if (fd >= 0)
484                 close_nointr_nofail(fd);
485
486         return r;
487 }
488
489 static int write_confirm_message(const char *format, ...) {
490         int fd;
491         va_list ap;
492
493         assert(format);
494
495         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
496         if (fd < 0)
497                 return fd;
498
499         va_start(ap, format);
500         vdprintf(fd, format, ap);
501         va_end(ap);
502
503         close_nointr_nofail(fd);
504
505         return 0;
506 }
507
508 static int restore_confirm_stdio(int *saved_stdin,
509                                  int *saved_stdout) {
510
511         int r = 0;
512
513         assert(saved_stdin);
514         assert(saved_stdout);
515
516         release_terminal();
517
518         if (*saved_stdin >= 0)
519                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
520                         r = -errno;
521
522         if (*saved_stdout >= 0)
523                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
524                         r = -errno;
525
526         if (*saved_stdin >= 0)
527                 close_nointr_nofail(*saved_stdin);
528
529         if (*saved_stdout >= 0)
530                 close_nointr_nofail(*saved_stdout);
531
532         return r;
533 }
534
535 static int ask_for_confirmation(char *response, char **argv) {
536         int saved_stdout = -1, saved_stdin = -1, r;
537         char *line;
538
539         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
540         if (r < 0)
541                 return r;
542
543         line = exec_command_line(argv);
544         if (!line)
545                 return -ENOMEM;
546
547         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
548         free(line);
549
550         restore_confirm_stdio(&saved_stdin, &saved_stdout);
551
552         return r;
553 }
554
555 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
556         bool keep_groups = false;
557         int r;
558
559         assert(context);
560
561         /* Lookup and set GID and supplementary group list. Here too
562          * we avoid NSS lookups for gid=0. */
563
564         if (context->group || username) {
565
566                 if (context->group) {
567                         const char *g = context->group;
568
569                         if ((r = get_group_creds(&g, &gid)) < 0)
570                                 return r;
571                 }
572
573                 /* First step, initialize groups from /etc/groups */
574                 if (username && gid != 0) {
575                         if (initgroups(username, gid) < 0)
576                                 return -errno;
577
578                         keep_groups = true;
579                 }
580
581                 /* Second step, set our gids */
582                 if (setresgid(gid, gid, gid) < 0)
583                         return -errno;
584         }
585
586         if (context->supplementary_groups) {
587                 int ngroups_max, k;
588                 gid_t *gids;
589                 char **i;
590
591                 /* Final step, initialize any manually set supplementary groups */
592                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
593
594                 if (!(gids = new(gid_t, ngroups_max)))
595                         return -ENOMEM;
596
597                 if (keep_groups) {
598                         if ((k = getgroups(ngroups_max, gids)) < 0) {
599                                 free(gids);
600                                 return -errno;
601                         }
602                 } else
603                         k = 0;
604
605                 STRV_FOREACH(i, context->supplementary_groups) {
606                         const char *g;
607
608                         if (k >= ngroups_max) {
609                                 free(gids);
610                                 return -E2BIG;
611                         }
612
613                         g = *i;
614                         r = get_group_creds(&g, gids+k);
615                         if (r < 0) {
616                                 free(gids);
617                                 return r;
618                         }
619
620                         k++;
621                 }
622
623                 if (setgroups(k, gids) < 0) {
624                         free(gids);
625                         return -errno;
626                 }
627
628                 free(gids);
629         }
630
631         return 0;
632 }
633
634 static int enforce_user(const ExecContext *context, uid_t uid) {
635         int r;
636         assert(context);
637
638         /* Sets (but doesn't lookup) the uid and make sure we keep the
639          * capabilities while doing so. */
640
641         if (context->capabilities) {
642                 cap_t d;
643                 static const cap_value_t bits[] = {
644                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
645                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
646                 };
647
648                 /* First step: If we need to keep capabilities but
649                  * drop privileges we need to make sure we keep our
650                  * caps, whiel we drop privileges. */
651                 if (uid != 0) {
652                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
653
654                         if (prctl(PR_GET_SECUREBITS) != sb)
655                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
656                                         return -errno;
657                 }
658
659                 /* Second step: set the capabilities. This will reduce
660                  * the capabilities to the minimum we need. */
661
662                 if (!(d = cap_dup(context->capabilities)))
663                         return -errno;
664
665                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
666                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
667                         r = -errno;
668                         cap_free(d);
669                         return r;
670                 }
671
672                 if (cap_set_proc(d) < 0) {
673                         r = -errno;
674                         cap_free(d);
675                         return r;
676                 }
677
678                 cap_free(d);
679         }
680
681         /* Third step: actually set the uids */
682         if (setresuid(uid, uid, uid) < 0)
683                 return -errno;
684
685         /* At this point we should have all necessary capabilities but
686            are otherwise a normal user. However, the caps might got
687            corrupted due to the setresuid() so we need clean them up
688            later. This is done outside of this call. */
689
690         return 0;
691 }
692
693 #ifdef HAVE_PAM
694
695 static int null_conv(
696                 int num_msg,
697                 const struct pam_message **msg,
698                 struct pam_response **resp,
699                 void *appdata_ptr) {
700
701         /* We don't support conversations */
702
703         return PAM_CONV_ERR;
704 }
705
706 static int setup_pam(
707                 const char *name,
708                 const char *user,
709                 uid_t uid,
710                 const char *tty,
711                 char ***pam_env,
712                 int fds[], unsigned n_fds) {
713
714         static const struct pam_conv conv = {
715                 .conv = null_conv,
716                 .appdata_ptr = NULL
717         };
718
719         pam_handle_t *handle = NULL;
720         sigset_t ss, old_ss;
721         int pam_code = PAM_SUCCESS;
722         int err;
723         char **e = NULL;
724         bool close_session = false;
725         pid_t pam_pid = 0, parent_pid;
726
727         assert(name);
728         assert(user);
729         assert(pam_env);
730
731         /* We set up PAM in the parent process, then fork. The child
732          * will then stay around until killed via PR_GET_PDEATHSIG or
733          * systemd via the cgroup logic. It will then remove the PAM
734          * session again. The parent process will exec() the actual
735          * daemon. We do things this way to ensure that the main PID
736          * of the daemon is the one we initially fork()ed. */
737
738         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
739                 handle = NULL;
740                 goto fail;
741         }
742
743         if (tty)
744                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
745                         goto fail;
746
747         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
748                 goto fail;
749
750         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
751                 goto fail;
752
753         close_session = true;
754
755         if ((!(e = pam_getenvlist(handle)))) {
756                 pam_code = PAM_BUF_ERR;
757                 goto fail;
758         }
759
760         /* Block SIGTERM, so that we know that it won't get lost in
761          * the child */
762         if (sigemptyset(&ss) < 0 ||
763             sigaddset(&ss, SIGTERM) < 0 ||
764             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
765                 goto fail;
766
767         parent_pid = getpid();
768
769         if ((pam_pid = fork()) < 0)
770                 goto fail;
771
772         if (pam_pid == 0) {
773                 int sig;
774                 int r = EXIT_PAM;
775
776                 /* The child's job is to reset the PAM session on
777                  * termination */
778
779                 /* This string must fit in 10 chars (i.e. the length
780                  * of "/sbin/init"), to look pretty in /bin/ps */
781                 rename_process("(sd-pam)");
782
783                 /* Make sure we don't keep open the passed fds in this
784                 child. We assume that otherwise only those fds are
785                 open here that have been opened by PAM. */
786                 close_many(fds, n_fds);
787
788                 /* Drop privileges - we don't need any to pam_close_session
789                  * and this will make PR_SET_PDEATHSIG work in most cases.
790                  * If this fails, ignore the error - but expect sd-pam threads
791                  * to fail to exit normally */
792                 if (setresuid(uid, uid, uid) < 0)
793                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
794
795                 /* Wait until our parent died. This will only work if
796                  * the above setresuid() succeeds, otherwise the kernel
797                  * will not allow unprivileged parents kill their privileged
798                  * children this way. We rely on the control groups kill logic
799                  * to do the rest for us. */
800                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
801                         goto child_finish;
802
803                 /* Check if our parent process might already have
804                  * died? */
805                 if (getppid() == parent_pid) {
806                         for (;;) {
807                                 if (sigwait(&ss, &sig) < 0) {
808                                         if (errno == EINTR)
809                                                 continue;
810
811                                         goto child_finish;
812                                 }
813
814                                 assert(sig == SIGTERM);
815                                 break;
816                         }
817                 }
818
819                 /* If our parent died we'll end the session */
820                 if (getppid() != parent_pid)
821                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
822                                 goto child_finish;
823
824                 r = 0;
825
826         child_finish:
827                 pam_end(handle, pam_code | PAM_DATA_SILENT);
828                 _exit(r);
829         }
830
831         /* If the child was forked off successfully it will do all the
832          * cleanups, so forget about the handle here. */
833         handle = NULL;
834
835         /* Unblock SIGTERM again in the parent */
836         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
837                 goto fail;
838
839         /* We close the log explicitly here, since the PAM modules
840          * might have opened it, but we don't want this fd around. */
841         closelog();
842
843         *pam_env = e;
844         e = NULL;
845
846         return 0;
847
848 fail:
849         if (pam_code != PAM_SUCCESS)
850                 err = -EPERM;  /* PAM errors do not map to errno */
851         else
852                 err = -errno;
853
854         if (handle) {
855                 if (close_session)
856                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
857
858                 pam_end(handle, pam_code | PAM_DATA_SILENT);
859         }
860
861         strv_free(e);
862
863         closelog();
864
865         if (pam_pid > 1) {
866                 kill(pam_pid, SIGTERM);
867                 kill(pam_pid, SIGCONT);
868         }
869
870         return err;
871 }
872 #endif
873
874 static void rename_process_from_path(const char *path) {
875         char process_name[11];
876         const char *p;
877         size_t l;
878
879         /* This resulting string must fit in 10 chars (i.e. the length
880          * of "/sbin/init") to look pretty in /bin/ps */
881
882         p = path_get_file_name(path);
883         if (isempty(p)) {
884                 rename_process("(...)");
885                 return;
886         }
887
888         l = strlen(p);
889         if (l > 8) {
890                 /* The end of the process name is usually more
891                  * interesting, since the first bit might just be
892                  * "systemd-" */
893                 p = p + l - 8;
894                 l = 8;
895         }
896
897         process_name[0] = '(';
898         memcpy(process_name+1, p, l);
899         process_name[1+l] = ')';
900         process_name[1+l+1] = 0;
901
902         rename_process(process_name);
903 }
904
905 static int apply_seccomp(uint32_t *syscall_filter) {
906         static const struct sock_filter header[] = {
907                 VALIDATE_ARCHITECTURE,
908                 EXAMINE_SYSCALL
909         };
910         static const struct sock_filter footer[] = {
911                 _KILL_PROCESS
912         };
913
914         int i;
915         unsigned n;
916         struct sock_filter *f;
917         struct sock_fprog prog;
918
919         assert(syscall_filter);
920
921         /* First: count the syscalls to check for */
922         for (i = 0, n = 0; i < syscall_max(); i++)
923                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
924                         n++;
925
926         /* Second: build the filter program from a header the syscall
927          * matches and the footer */
928         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
929         memcpy(f, header, sizeof(header));
930
931         for (i = 0, n = 0; i < syscall_max(); i++)
932                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
933                         struct sock_filter item[] = {
934                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
935                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
936                         };
937
938                         assert_cc(ELEMENTSOF(item) == 2);
939
940                         f[ELEMENTSOF(header) + 2*n]  = item[0];
941                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
942
943                         n++;
944                 }
945
946         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
947
948         /* Third: install the filter */
949         zero(prog);
950         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
951         prog.filter = f;
952         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
953                 return -errno;
954
955         return 0;
956 }
957
958 int exec_spawn(ExecCommand *command,
959                char **argv,
960                const ExecContext *context,
961                int fds[], unsigned n_fds,
962                char **environment,
963                bool apply_permissions,
964                bool apply_chroot,
965                bool apply_tty_stdin,
966                bool confirm_spawn,
967                CGroupBonding *cgroup_bondings,
968                CGroupAttribute *cgroup_attributes,
969                const char *cgroup_suffix,
970                const char *unit_id,
971                int idle_pipe[2],
972                pid_t *ret) {
973
974         pid_t pid;
975         int r;
976         char *line;
977         int socket_fd;
978         char _cleanup_strv_free_ **files_env = NULL;
979
980         assert(command);
981         assert(context);
982         assert(ret);
983         assert(fds || n_fds <= 0);
984
985         if (context->std_input == EXEC_INPUT_SOCKET ||
986             context->std_output == EXEC_OUTPUT_SOCKET ||
987             context->std_error == EXEC_OUTPUT_SOCKET) {
988
989                 if (n_fds != 1)
990                         return -EINVAL;
991
992                 socket_fd = fds[0];
993
994                 fds = NULL;
995                 n_fds = 0;
996         } else
997                 socket_fd = -1;
998
999         r = exec_context_load_environment(context, &files_env);
1000         if (r < 0) {
1001                 log_struct_unit(LOG_ERR,
1002                            unit_id,
1003                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1004                            "ERRNO=%d", -r,
1005                            NULL);
1006                 return r;
1007         }
1008
1009         if (!argv)
1010                 argv = command->argv;
1011
1012         line = exec_command_line(argv);
1013         if (!line)
1014                 return log_oom();
1015
1016         log_struct_unit(LOG_DEBUG,
1017                    unit_id,
1018                    "MESSAGE=About to execute %s", line,
1019                    NULL);
1020         free(line);
1021
1022         r = cgroup_bonding_realize_list(cgroup_bondings);
1023         if (r < 0)
1024                 return r;
1025
1026         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1027
1028         pid = fork();
1029         if (pid < 0)
1030                 return -errno;
1031
1032         if (pid == 0) {
1033                 int i, err;
1034                 sigset_t ss;
1035                 const char *username = NULL, *home = NULL;
1036                 uid_t uid = (uid_t) -1;
1037                 gid_t gid = (gid_t) -1;
1038                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1039                         **final_env = NULL, **final_argv = NULL;
1040                 unsigned n_env = 0;
1041                 bool set_access = false;
1042
1043                 /* child */
1044
1045                 rename_process_from_path(command->path);
1046
1047                 /* We reset exactly these signals, since they are the
1048                  * only ones we set to SIG_IGN in the main daemon. All
1049                  * others we leave untouched because we set them to
1050                  * SIG_DFL or a valid handler initially, both of which
1051                  * will be demoted to SIG_DFL. */
1052                 default_signals(SIGNALS_CRASH_HANDLER,
1053                                 SIGNALS_IGNORE, -1);
1054
1055                 if (context->ignore_sigpipe)
1056                         ignore_signals(SIGPIPE, -1);
1057
1058                 assert_se(sigemptyset(&ss) == 0);
1059                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1060                         err = -errno;
1061                         r = EXIT_SIGNAL_MASK;
1062                         goto fail_child;
1063                 }
1064
1065                 if (idle_pipe) {
1066                         if (idle_pipe[1] >= 0)
1067                                 close_nointr_nofail(idle_pipe[1]);
1068                         if (idle_pipe[0] >= 0) {
1069                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1070                                 close_nointr_nofail(idle_pipe[0]);
1071                         }
1072                 }
1073
1074                 /* Close sockets very early to make sure we don't
1075                  * block init reexecution because it cannot bind its
1076                  * sockets */
1077                 log_forget_fds();
1078                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1079                                            socket_fd >= 0 ? 1 : n_fds);
1080                 if (err < 0) {
1081                         r = EXIT_FDS;
1082                         goto fail_child;
1083                 }
1084
1085                 if (!context->same_pgrp)
1086                         if (setsid() < 0) {
1087                                 err = -errno;
1088                                 r = EXIT_SETSID;
1089                                 goto fail_child;
1090                         }
1091
1092                 if (context->tcpwrap_name) {
1093                         if (socket_fd >= 0)
1094                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1095                                         err = -EACCES;
1096                                         r = EXIT_TCPWRAP;
1097                                         goto fail_child;
1098                                 }
1099
1100                         for (i = 0; i < (int) n_fds; i++) {
1101                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1102                                         err = -EACCES;
1103                                         r = EXIT_TCPWRAP;
1104                                         goto fail_child;
1105                                 }
1106                         }
1107                 }
1108
1109                 exec_context_tty_reset(context);
1110
1111                 if (confirm_spawn) {
1112                         char response;
1113
1114                         err = ask_for_confirmation(&response, argv);
1115                         if (err == -ETIMEDOUT)
1116                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1117                         else if (err < 0)
1118                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1119                         else if (response == 's') {
1120                                 write_confirm_message("Skipping execution.\n");
1121                                 err = -ECANCELED;
1122                                 r = EXIT_CONFIRM;
1123                                 goto fail_child;
1124                         } else if (response == 'n') {
1125                                 write_confirm_message("Failing execution.\n");
1126                                 err = r = 0;
1127                                 goto fail_child;
1128                         }
1129                 }
1130
1131                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1132                  * must sure to drop O_NONBLOCK */
1133                 if (socket_fd >= 0)
1134                         fd_nonblock(socket_fd, false);
1135
1136                 err = setup_input(context, socket_fd, apply_tty_stdin);
1137                 if (err < 0) {
1138                         r = EXIT_STDIN;
1139                         goto fail_child;
1140                 }
1141
1142                 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1143                 if (err < 0) {
1144                         r = EXIT_STDOUT;
1145                         goto fail_child;
1146                 }
1147
1148                 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1149                 if (err < 0) {
1150                         r = EXIT_STDERR;
1151                         goto fail_child;
1152                 }
1153
1154                 if (cgroup_bondings) {
1155                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1156                         if (err < 0) {
1157                                 r = EXIT_CGROUP;
1158                                 goto fail_child;
1159                         }
1160                 }
1161
1162                 if (context->oom_score_adjust_set) {
1163                         char t[16];
1164
1165                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1166                         char_array_0(t);
1167
1168                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1169                                 err = -errno;
1170                                 r = EXIT_OOM_ADJUST;
1171                                 goto fail_child;
1172                         }
1173                 }
1174
1175                 if (context->nice_set)
1176                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1177                                 err = -errno;
1178                                 r = EXIT_NICE;
1179                                 goto fail_child;
1180                         }
1181
1182                 if (context->cpu_sched_set) {
1183                         struct sched_param param;
1184
1185                         zero(param);
1186                         param.sched_priority = context->cpu_sched_priority;
1187
1188                         if (sched_setscheduler(0, context->cpu_sched_policy |
1189                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1190                                 err = -errno;
1191                                 r = EXIT_SETSCHEDULER;
1192                                 goto fail_child;
1193                         }
1194                 }
1195
1196                 if (context->cpuset)
1197                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1198                                 err = -errno;
1199                                 r = EXIT_CPUAFFINITY;
1200                                 goto fail_child;
1201                         }
1202
1203                 if (context->ioprio_set)
1204                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1205                                 err = -errno;
1206                                 r = EXIT_IOPRIO;
1207                                 goto fail_child;
1208                         }
1209
1210                 if (context->timer_slack_nsec != (nsec_t) -1)
1211                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1212                                 err = -errno;
1213                                 r = EXIT_TIMERSLACK;
1214                                 goto fail_child;
1215                         }
1216
1217                 if (context->utmp_id)
1218                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1219
1220                 if (context->user) {
1221                         username = context->user;
1222                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1223                         if (err < 0) {
1224                                 r = EXIT_USER;
1225                                 goto fail_child;
1226                         }
1227
1228                         if (is_terminal_input(context->std_input)) {
1229                                 err = chown_terminal(STDIN_FILENO, uid);
1230                                 if (err < 0) {
1231                                         r = EXIT_STDIN;
1232                                         goto fail_child;
1233                                 }
1234                         }
1235
1236                         if (cgroup_bondings && context->control_group_modify) {
1237                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1238                                 if (err >= 0)
1239                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1240                                 if (err < 0) {
1241                                         r = EXIT_CGROUP;
1242                                         goto fail_child;
1243                                 }
1244
1245                                 set_access = true;
1246                         }
1247                 }
1248
1249                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1250                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1251                         if (err < 0) {
1252                                 r = EXIT_CGROUP;
1253                                 goto fail_child;
1254                         }
1255                 }
1256
1257                 if (apply_permissions) {
1258                         err = enforce_groups(context, username, gid);
1259                         if (err < 0) {
1260                                 r = EXIT_GROUP;
1261                                 goto fail_child;
1262                         }
1263                 }
1264
1265                 umask(context->umask);
1266
1267 #ifdef HAVE_PAM
1268                 if (apply_permissions && context->pam_name && username) {
1269                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1270                         if (err < 0) {
1271                                 r = EXIT_PAM;
1272                                 goto fail_child;
1273                         }
1274                 }
1275 #endif
1276                 if (context->private_network) {
1277                         if (unshare(CLONE_NEWNET) < 0) {
1278                                 err = -errno;
1279                                 r = EXIT_NETWORK;
1280                                 goto fail_child;
1281                         }
1282
1283                         loopback_setup();
1284                 }
1285
1286                 if (strv_length(context->read_write_dirs) > 0 ||
1287                     strv_length(context->read_only_dirs) > 0 ||
1288                     strv_length(context->inaccessible_dirs) > 0 ||
1289                     context->mount_flags != 0 ||
1290                     context->private_tmp) {
1291                         err = setup_namespace(context->read_write_dirs,
1292                                               context->read_only_dirs,
1293                                               context->inaccessible_dirs,
1294                                               context->private_tmp,
1295                                               context->mount_flags);
1296                         if (err < 0) {
1297                                 r = EXIT_NAMESPACE;
1298                                 goto fail_child;
1299                         }
1300                 }
1301
1302                 if (apply_chroot) {
1303                         if (context->root_directory)
1304                                 if (chroot(context->root_directory) < 0) {
1305                                         err = -errno;
1306                                         r = EXIT_CHROOT;
1307                                         goto fail_child;
1308                                 }
1309
1310                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1311                                 err = -errno;
1312                                 r = EXIT_CHDIR;
1313                                 goto fail_child;
1314                         }
1315                 } else {
1316                         char _cleanup_free_ *d = NULL;
1317
1318                         if (asprintf(&d, "%s/%s",
1319                                      context->root_directory ? context->root_directory : "",
1320                                      context->working_directory ? context->working_directory : "") < 0) {
1321                                 err = -ENOMEM;
1322                                 r = EXIT_MEMORY;
1323                                 goto fail_child;
1324                         }
1325
1326                         if (chdir(d) < 0) {
1327                                 err = -errno;
1328                                 r = EXIT_CHDIR;
1329                                 goto fail_child;
1330                         }
1331                 }
1332
1333                 /* We repeat the fd closing here, to make sure that
1334                  * nothing is leaked from the PAM modules */
1335                 err = close_all_fds(fds, n_fds);
1336                 if (err >= 0)
1337                         err = shift_fds(fds, n_fds);
1338                 if (err >= 0)
1339                         err = flags_fds(fds, n_fds, context->non_blocking);
1340                 if (err < 0) {
1341                         r = EXIT_FDS;
1342                         goto fail_child;
1343                 }
1344
1345                 if (apply_permissions) {
1346
1347                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1348                                 if (!context->rlimit[i])
1349                                         continue;
1350
1351                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1352                                         err = -errno;
1353                                         r = EXIT_LIMITS;
1354                                         goto fail_child;
1355                                 }
1356                         }
1357
1358                         if (context->capability_bounding_set_drop) {
1359                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1360                                 if (err < 0) {
1361                                         r = EXIT_CAPABILITIES;
1362                                         goto fail_child;
1363                                 }
1364                         }
1365
1366                         if (context->user) {
1367                                 err = enforce_user(context, uid);
1368                                 if (err < 0) {
1369                                         r = EXIT_USER;
1370                                         goto fail_child;
1371                                 }
1372                         }
1373
1374                         /* PR_GET_SECUREBITS is not privileged, while
1375                          * PR_SET_SECUREBITS is. So to suppress
1376                          * potential EPERMs we'll try not to call
1377                          * PR_SET_SECUREBITS unless necessary. */
1378                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1379                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1380                                         err = -errno;
1381                                         r = EXIT_SECUREBITS;
1382                                         goto fail_child;
1383                                 }
1384
1385                         if (context->capabilities)
1386                                 if (cap_set_proc(context->capabilities) < 0) {
1387                                         err = -errno;
1388                                         r = EXIT_CAPABILITIES;
1389                                         goto fail_child;
1390                                 }
1391
1392                         if (context->no_new_privileges)
1393                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1394                                         err = -errno;
1395                                         r = EXIT_NO_NEW_PRIVILEGES;
1396                                         goto fail_child;
1397                                 }
1398
1399                         if (context->syscall_filter) {
1400                                 err = apply_seccomp(context->syscall_filter);
1401                                 if (err < 0) {
1402                                         r = EXIT_SECCOMP;
1403                                         goto fail_child;
1404                                 }
1405                         }
1406                 }
1407
1408                 if (!(our_env = new0(char*, 7))) {
1409                         err = -ENOMEM;
1410                         r = EXIT_MEMORY;
1411                         goto fail_child;
1412                 }
1413
1414                 if (n_fds > 0)
1415                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1416                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1417                                 err = -ENOMEM;
1418                                 r = EXIT_MEMORY;
1419                                 goto fail_child;
1420                         }
1421
1422                 if (home)
1423                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1424                                 err = -ENOMEM;
1425                                 r = EXIT_MEMORY;
1426                                 goto fail_child;
1427                         }
1428
1429                 if (username)
1430                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1431                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1432                                 err = -ENOMEM;
1433                                 r = EXIT_MEMORY;
1434                                 goto fail_child;
1435                         }
1436
1437                 if (is_terminal_input(context->std_input) ||
1438                     context->std_output == EXEC_OUTPUT_TTY ||
1439                     context->std_error == EXEC_OUTPUT_TTY)
1440                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1441                                 err = -ENOMEM;
1442                                 r = EXIT_MEMORY;
1443                                 goto fail_child;
1444                         }
1445
1446                 assert(n_env <= 7);
1447
1448                 if (!(final_env = strv_env_merge(
1449                                       5,
1450                                       environment,
1451                                       our_env,
1452                                       context->environment,
1453                                       files_env,
1454                                       pam_env,
1455                                       NULL))) {
1456                         err = -ENOMEM;
1457                         r = EXIT_MEMORY;
1458                         goto fail_child;
1459                 }
1460
1461                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1462                         err = -ENOMEM;
1463                         r = EXIT_MEMORY;
1464                         goto fail_child;
1465                 }
1466
1467                 final_env = strv_env_clean(final_env);
1468
1469                 execve(command->path, final_argv, final_env);
1470                 err = -errno;
1471                 r = EXIT_EXEC;
1472
1473         fail_child:
1474                 if (r != 0) {
1475                         log_open();
1476                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1477                                    "EXECUTABLE=%s", command->path,
1478                                    "MESSAGE=Failed at step %s spawning %s: %s",
1479                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1480                                           command->path, strerror(-err),
1481                                    "ERRNO=%d", -err,
1482                                    NULL);
1483                         log_close();
1484                 }
1485
1486                 _exit(r);
1487         }
1488
1489         log_struct_unit(LOG_DEBUG,
1490                    unit_id,
1491                    "MESSAGE=Forked %s as %lu",
1492                           command->path, (unsigned long) pid,
1493                    NULL);
1494
1495         /* We add the new process to the cgroup both in the child (so
1496          * that we can be sure that no user code is ever executed
1497          * outside of the cgroup) and in the parent (so that we can be
1498          * sure that when we kill the cgroup the process will be
1499          * killed too). */
1500         if (cgroup_bondings)
1501                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1502
1503         exec_status_start(&command->exec_status, pid);
1504
1505         *ret = pid;
1506         return 0;
1507 }
1508
1509 void exec_context_init(ExecContext *c) {
1510         assert(c);
1511
1512         c->umask = 0022;
1513         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1514         c->cpu_sched_policy = SCHED_OTHER;
1515         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1516         c->syslog_level_prefix = true;
1517         c->control_group_persistent = -1;
1518         c->ignore_sigpipe = true;
1519         c->timer_slack_nsec = (nsec_t) -1;
1520 }
1521
1522 void exec_context_done(ExecContext *c) {
1523         unsigned l;
1524
1525         assert(c);
1526
1527         strv_free(c->environment);
1528         c->environment = NULL;
1529
1530         strv_free(c->environment_files);
1531         c->environment_files = NULL;
1532
1533         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1534                 free(c->rlimit[l]);
1535                 c->rlimit[l] = NULL;
1536         }
1537
1538         free(c->working_directory);
1539         c->working_directory = NULL;
1540         free(c->root_directory);
1541         c->root_directory = NULL;
1542
1543         free(c->tty_path);
1544         c->tty_path = NULL;
1545
1546         free(c->tcpwrap_name);
1547         c->tcpwrap_name = NULL;
1548
1549         free(c->syslog_identifier);
1550         c->syslog_identifier = NULL;
1551
1552         free(c->user);
1553         c->user = NULL;
1554
1555         free(c->group);
1556         c->group = NULL;
1557
1558         strv_free(c->supplementary_groups);
1559         c->supplementary_groups = NULL;
1560
1561         free(c->pam_name);
1562         c->pam_name = NULL;
1563
1564         if (c->capabilities) {
1565                 cap_free(c->capabilities);
1566                 c->capabilities = NULL;
1567         }
1568
1569         strv_free(c->read_only_dirs);
1570         c->read_only_dirs = NULL;
1571
1572         strv_free(c->read_write_dirs);
1573         c->read_write_dirs = NULL;
1574
1575         strv_free(c->inaccessible_dirs);
1576         c->inaccessible_dirs = NULL;
1577
1578         if (c->cpuset)
1579                 CPU_FREE(c->cpuset);
1580
1581         free(c->utmp_id);
1582         c->utmp_id = NULL;
1583
1584         free(c->syscall_filter);
1585         c->syscall_filter = NULL;
1586 }
1587
1588 void exec_command_done(ExecCommand *c) {
1589         assert(c);
1590
1591         free(c->path);
1592         c->path = NULL;
1593
1594         strv_free(c->argv);
1595         c->argv = NULL;
1596 }
1597
1598 void exec_command_done_array(ExecCommand *c, unsigned n) {
1599         unsigned i;
1600
1601         for (i = 0; i < n; i++)
1602                 exec_command_done(c+i);
1603 }
1604
1605 void exec_command_free_list(ExecCommand *c) {
1606         ExecCommand *i;
1607
1608         while ((i = c)) {
1609                 LIST_REMOVE(ExecCommand, command, c, i);
1610                 exec_command_done(i);
1611                 free(i);
1612         }
1613 }
1614
1615 void exec_command_free_array(ExecCommand **c, unsigned n) {
1616         unsigned i;
1617
1618         for (i = 0; i < n; i++) {
1619                 exec_command_free_list(c[i]);
1620                 c[i] = NULL;
1621         }
1622 }
1623
1624 int exec_context_load_environment(const ExecContext *c, char ***l) {
1625         char **i, **r = NULL;
1626
1627         assert(c);
1628         assert(l);
1629
1630         STRV_FOREACH(i, c->environment_files) {
1631                 char *fn;
1632                 int k;
1633                 bool ignore = false;
1634                 char **p;
1635                 glob_t pglob;
1636                 int count, n;
1637
1638                 fn = *i;
1639
1640                 if (fn[0] == '-') {
1641                         ignore = true;
1642                         fn ++;
1643                 }
1644
1645                 if (!path_is_absolute(fn)) {
1646
1647                         if (ignore)
1648                                 continue;
1649
1650                         strv_free(r);
1651                         return -EINVAL;
1652                 }
1653
1654                 /* Filename supports globbing, take all matching files */
1655                 zero(pglob);
1656                 errno = 0;
1657                 if (glob(fn, 0, NULL, &pglob) != 0) {
1658                         globfree(&pglob);
1659                         if (ignore)
1660                                 continue;
1661
1662                         strv_free(r);
1663                         return errno ? -errno : -EINVAL;
1664                 }
1665                 count = pglob.gl_pathc;
1666                 if (count == 0) {
1667                         globfree(&pglob);
1668                         if (ignore)
1669                                 continue;
1670
1671                         strv_free(r);
1672                         return -EINVAL;
1673                 }
1674                 for (n = 0; n < count; n++) {
1675                         k = load_env_file(pglob.gl_pathv[n], &p);
1676                         if (k < 0) {
1677                                 if (ignore)
1678                                         continue;
1679
1680                                 strv_free(r);
1681                                 globfree(&pglob);
1682                                 return k;
1683                          }
1684
1685                         if (r == NULL)
1686                                 r = p;
1687                         else {
1688                                 char **m;
1689
1690                                 m = strv_env_merge(2, r, p);
1691                                 strv_free(r);
1692                                 strv_free(p);
1693
1694                                 if (!m) {
1695                                         globfree(&pglob);
1696                                         return -ENOMEM;
1697                                 }
1698
1699                                 r = m;
1700                         }
1701                 }
1702                 globfree(&pglob);
1703         }
1704
1705         *l = r;
1706
1707         return 0;
1708 }
1709
1710 static void strv_fprintf(FILE *f, char **l) {
1711         char **g;
1712
1713         assert(f);
1714
1715         STRV_FOREACH(g, l)
1716                 fprintf(f, " %s", *g);
1717 }
1718
1719 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1720         char ** e;
1721         unsigned i;
1722
1723         assert(c);
1724         assert(f);
1725
1726         if (!prefix)
1727                 prefix = "";
1728
1729         fprintf(f,
1730                 "%sUMask: %04o\n"
1731                 "%sWorkingDirectory: %s\n"
1732                 "%sRootDirectory: %s\n"
1733                 "%sNonBlocking: %s\n"
1734                 "%sPrivateTmp: %s\n"
1735                 "%sControlGroupModify: %s\n"
1736                 "%sControlGroupPersistent: %s\n"
1737                 "%sPrivateNetwork: %s\n"
1738                 "%sIgnoreSIGPIPE: %s\n",
1739                 prefix, c->umask,
1740                 prefix, c->working_directory ? c->working_directory : "/",
1741                 prefix, c->root_directory ? c->root_directory : "/",
1742                 prefix, yes_no(c->non_blocking),
1743                 prefix, yes_no(c->private_tmp),
1744                 prefix, yes_no(c->control_group_modify),
1745                 prefix, yes_no(c->control_group_persistent),
1746                 prefix, yes_no(c->private_network),
1747                 prefix, yes_no(c->ignore_sigpipe));
1748
1749         STRV_FOREACH(e, c->environment)
1750                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1751
1752         STRV_FOREACH(e, c->environment_files)
1753                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1754
1755         if (c->tcpwrap_name)
1756                 fprintf(f,
1757                         "%sTCPWrapName: %s\n",
1758                         prefix, c->tcpwrap_name);
1759
1760         if (c->nice_set)
1761                 fprintf(f,
1762                         "%sNice: %i\n",
1763                         prefix, c->nice);
1764
1765         if (c->oom_score_adjust_set)
1766                 fprintf(f,
1767                         "%sOOMScoreAdjust: %i\n",
1768                         prefix, c->oom_score_adjust);
1769
1770         for (i = 0; i < RLIM_NLIMITS; i++)
1771                 if (c->rlimit[i])
1772                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1773
1774         if (c->ioprio_set) {
1775                 char *class_str;
1776                 int r;
1777
1778                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1779                 if (r < 0)
1780                         class_str = NULL;
1781                 fprintf(f,
1782                         "%sIOSchedulingClass: %s\n"
1783                         "%sIOPriority: %i\n",
1784                         prefix, strna(class_str),
1785                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1786                 free(class_str);
1787         }
1788
1789         if (c->cpu_sched_set) {
1790                 char *policy_str;
1791                 int r;
1792
1793                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1794                 if (r < 0)
1795                         policy_str = NULL;
1796                 fprintf(f,
1797                         "%sCPUSchedulingPolicy: %s\n"
1798                         "%sCPUSchedulingPriority: %i\n"
1799                         "%sCPUSchedulingResetOnFork: %s\n",
1800                         prefix, strna(policy_str),
1801                         prefix, c->cpu_sched_priority,
1802                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1803                 free(policy_str);
1804         }
1805
1806         if (c->cpuset) {
1807                 fprintf(f, "%sCPUAffinity:", prefix);
1808                 for (i = 0; i < c->cpuset_ncpus; i++)
1809                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1810                                 fprintf(f, " %i", i);
1811                 fputs("\n", f);
1812         }
1813
1814         if (c->timer_slack_nsec != (nsec_t) -1)
1815                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1816
1817         fprintf(f,
1818                 "%sStandardInput: %s\n"
1819                 "%sStandardOutput: %s\n"
1820                 "%sStandardError: %s\n",
1821                 prefix, exec_input_to_string(c->std_input),
1822                 prefix, exec_output_to_string(c->std_output),
1823                 prefix, exec_output_to_string(c->std_error));
1824
1825         if (c->tty_path)
1826                 fprintf(f,
1827                         "%sTTYPath: %s\n"
1828                         "%sTTYReset: %s\n"
1829                         "%sTTYVHangup: %s\n"
1830                         "%sTTYVTDisallocate: %s\n",
1831                         prefix, c->tty_path,
1832                         prefix, yes_no(c->tty_reset),
1833                         prefix, yes_no(c->tty_vhangup),
1834                         prefix, yes_no(c->tty_vt_disallocate));
1835
1836         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1837             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1838             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1839             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1840                 char *fac_str, *lvl_str;
1841                 int r;
1842
1843                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1844                 if (r < 0)
1845                         fac_str = NULL;
1846
1847                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1848                 if (r < 0)
1849                         lvl_str = NULL;
1850
1851                 fprintf(f,
1852                         "%sSyslogFacility: %s\n"
1853                         "%sSyslogLevel: %s\n",
1854                         prefix, strna(fac_str),
1855                         prefix, strna(lvl_str));
1856                 free(lvl_str);
1857                 free(fac_str);
1858         }
1859
1860         if (c->capabilities) {
1861                 char *t;
1862                 if ((t = cap_to_text(c->capabilities, NULL))) {
1863                         fprintf(f, "%sCapabilities: %s\n",
1864                                 prefix, t);
1865                         cap_free(t);
1866                 }
1867         }
1868
1869         if (c->secure_bits)
1870                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1871                         prefix,
1872                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1873                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1874                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1875                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1876                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1877                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1878
1879         if (c->capability_bounding_set_drop) {
1880                 unsigned long l;
1881                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1882
1883                 for (l = 0; l <= cap_last_cap(); l++)
1884                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1885                                 char *t;
1886
1887                                 if ((t = cap_to_name(l))) {
1888                                         fprintf(f, " %s", t);
1889                                         cap_free(t);
1890                                 }
1891                         }
1892
1893                 fputs("\n", f);
1894         }
1895
1896         if (c->user)
1897                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1898         if (c->group)
1899                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1900
1901         if (strv_length(c->supplementary_groups) > 0) {
1902                 fprintf(f, "%sSupplementaryGroups:", prefix);
1903                 strv_fprintf(f, c->supplementary_groups);
1904                 fputs("\n", f);
1905         }
1906
1907         if (c->pam_name)
1908                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1909
1910         if (strv_length(c->read_write_dirs) > 0) {
1911                 fprintf(f, "%sReadWriteDirs:", prefix);
1912                 strv_fprintf(f, c->read_write_dirs);
1913                 fputs("\n", f);
1914         }
1915
1916         if (strv_length(c->read_only_dirs) > 0) {
1917                 fprintf(f, "%sReadOnlyDirs:", prefix);
1918                 strv_fprintf(f, c->read_only_dirs);
1919                 fputs("\n", f);
1920         }
1921
1922         if (strv_length(c->inaccessible_dirs) > 0) {
1923                 fprintf(f, "%sInaccessibleDirs:", prefix);
1924                 strv_fprintf(f, c->inaccessible_dirs);
1925                 fputs("\n", f);
1926         }
1927
1928         if (c->utmp_id)
1929                 fprintf(f,
1930                         "%sUtmpIdentifier: %s\n",
1931                         prefix, c->utmp_id);
1932 }
1933
1934 void exec_status_start(ExecStatus *s, pid_t pid) {
1935         assert(s);
1936
1937         zero(*s);
1938         s->pid = pid;
1939         dual_timestamp_get(&s->start_timestamp);
1940 }
1941
1942 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1943         assert(s);
1944
1945         if (s->pid && s->pid != pid)
1946                 zero(*s);
1947
1948         s->pid = pid;
1949         dual_timestamp_get(&s->exit_timestamp);
1950
1951         s->code = code;
1952         s->status = status;
1953
1954         if (context) {
1955                 if (context->utmp_id)
1956                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1957
1958                 exec_context_tty_reset(context);
1959         }
1960 }
1961
1962 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1963         char buf[FORMAT_TIMESTAMP_MAX];
1964
1965         assert(s);
1966         assert(f);
1967
1968         if (!prefix)
1969                 prefix = "";
1970
1971         if (s->pid <= 0)
1972                 return;
1973
1974         fprintf(f,
1975                 "%sPID: %lu\n",
1976                 prefix, (unsigned long) s->pid);
1977
1978         if (s->start_timestamp.realtime > 0)
1979                 fprintf(f,
1980                         "%sStart Timestamp: %s\n",
1981                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1982
1983         if (s->exit_timestamp.realtime > 0)
1984                 fprintf(f,
1985                         "%sExit Timestamp: %s\n"
1986                         "%sExit Code: %s\n"
1987                         "%sExit Status: %i\n",
1988                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1989                         prefix, sigchld_code_to_string(s->code),
1990                         prefix, s->status);
1991 }
1992
1993 char *exec_command_line(char **argv) {
1994         size_t k;
1995         char *n, *p, **a;
1996         bool first = true;
1997
1998         assert(argv);
1999
2000         k = 1;
2001         STRV_FOREACH(a, argv)
2002                 k += strlen(*a)+3;
2003
2004         if (!(n = new(char, k)))
2005                 return NULL;
2006
2007         p = n;
2008         STRV_FOREACH(a, argv) {
2009
2010                 if (!first)
2011                         *(p++) = ' ';
2012                 else
2013                         first = false;
2014
2015                 if (strpbrk(*a, WHITESPACE)) {
2016                         *(p++) = '\'';
2017                         p = stpcpy(p, *a);
2018                         *(p++) = '\'';
2019                 } else
2020                         p = stpcpy(p, *a);
2021
2022         }
2023
2024         *p = 0;
2025
2026         /* FIXME: this doesn't really handle arguments that have
2027          * spaces and ticks in them */
2028
2029         return n;
2030 }
2031
2032 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2033         char *p2;
2034         const char *prefix2;
2035
2036         char *cmd;
2037
2038         assert(c);
2039         assert(f);
2040
2041         if (!prefix)
2042                 prefix = "";
2043         p2 = strappend(prefix, "\t");
2044         prefix2 = p2 ? p2 : prefix;
2045
2046         cmd = exec_command_line(c->argv);
2047
2048         fprintf(f,
2049                 "%sCommand Line: %s\n",
2050                 prefix, cmd ? cmd : strerror(ENOMEM));
2051
2052         free(cmd);
2053
2054         exec_status_dump(&c->exec_status, f, prefix2);
2055
2056         free(p2);
2057 }
2058
2059 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2060         assert(f);
2061
2062         if (!prefix)
2063                 prefix = "";
2064
2065         LIST_FOREACH(command, c, c)
2066                 exec_command_dump(c, f, prefix);
2067 }
2068
2069 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2070         ExecCommand *end;
2071
2072         assert(l);
2073         assert(e);
2074
2075         if (*l) {
2076                 /* It's kind of important, that we keep the order here */
2077                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2078                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2079         } else
2080               *l = e;
2081 }
2082
2083 int exec_command_set(ExecCommand *c, const char *path, ...) {
2084         va_list ap;
2085         char **l, *p;
2086
2087         assert(c);
2088         assert(path);
2089
2090         va_start(ap, path);
2091         l = strv_new_ap(path, ap);
2092         va_end(ap);
2093
2094         if (!l)
2095                 return -ENOMEM;
2096
2097         if (!(p = strdup(path))) {
2098                 strv_free(l);
2099                 return -ENOMEM;
2100         }
2101
2102         free(c->path);
2103         c->path = p;
2104
2105         strv_free(c->argv);
2106         c->argv = l;
2107
2108         return 0;
2109 }
2110
2111 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2112         [EXEC_INPUT_NULL] = "null",
2113         [EXEC_INPUT_TTY] = "tty",
2114         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2115         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2116         [EXEC_INPUT_SOCKET] = "socket"
2117 };
2118
2119 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2120
2121 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2122         [EXEC_OUTPUT_INHERIT] = "inherit",
2123         [EXEC_OUTPUT_NULL] = "null",
2124         [EXEC_OUTPUT_TTY] = "tty",
2125         [EXEC_OUTPUT_SYSLOG] = "syslog",
2126         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2127         [EXEC_OUTPUT_KMSG] = "kmsg",
2128         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2129         [EXEC_OUTPUT_JOURNAL] = "journal",
2130         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2131         [EXEC_OUTPUT_SOCKET] = "socket"
2132 };
2133
2134 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);