chiark / gitweb /
1f6263519607eb8d3e33dad7ad8fd22afc2a9a12
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71
72 /* This assumes there is a 'tty' group */
73 #define TTY_MODE 0620
74
75 static int shift_fds(int fds[], unsigned n_fds) {
76         int start, restart_from;
77
78         if (n_fds <= 0)
79                 return 0;
80
81         /* Modifies the fds array! (sorts it) */
82
83         assert(fds);
84
85         start = 0;
86         for (;;) {
87                 int i;
88
89                 restart_from = -1;
90
91                 for (i = start; i < (int) n_fds; i++) {
92                         int nfd;
93
94                         /* Already at right index? */
95                         if (fds[i] == i+3)
96                                 continue;
97
98                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99                                 return -errno;
100
101                         close_nointr_nofail(fds[i]);
102                         fds[i] = nfd;
103
104                         /* Hmm, the fd we wanted isn't free? Then
105                          * let's remember that and try again from here*/
106                         if (nfd != i+3 && restart_from < 0)
107                                 restart_from = i;
108                 }
109
110                 if (restart_from < 0)
111                         break;
112
113                 start = restart_from;
114         }
115
116         return 0;
117 }
118
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120         unsigned i;
121         int r;
122
123         if (n_fds <= 0)
124                 return 0;
125
126         assert(fds);
127
128         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129
130         for (i = 0; i < n_fds; i++) {
131
132                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133                         return r;
134
135                 /* We unconditionally drop FD_CLOEXEC from the fds,
136                  * since after all we want to pass these fds to our
137                  * children */
138
139                 if ((r = fd_cloexec(fds[i], false)) < 0)
140                         return r;
141         }
142
143         return 0;
144 }
145
146 static const char *tty_path(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_path)
150                 return context->tty_path;
151
152         return "/dev/console";
153 }
154
155 void exec_context_tty_reset(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_vhangup)
159                 terminal_vhangup(tty_path(context));
160
161         if (context->tty_reset)
162                 reset_terminal(tty_path(context));
163
164         if (context->tty_vt_disallocate && context->tty_path)
165                 vt_disallocate(context->tty_path);
166 }
167
168 static bool is_terminal_output(ExecOutput o) {
169         return
170                 o == EXEC_OUTPUT_TTY ||
171                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
172                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
173                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
174 }
175
176 static int open_null_as(int flags, int nfd) {
177         int fd, r;
178
179         assert(nfd >= 0);
180
181         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
182                 return -errno;
183
184         if (fd != nfd) {
185                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
186                 close_nointr_nofail(fd);
187         } else
188                 r = nfd;
189
190         return r;
191 }
192
193 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
194         int fd, r;
195         union sockaddr_union sa;
196
197         assert(context);
198         assert(output < _EXEC_OUTPUT_MAX);
199         assert(ident);
200         assert(nfd >= 0);
201
202         fd = socket(AF_UNIX, SOCK_STREAM, 0);
203         if (fd < 0)
204                 return -errno;
205
206         zero(sa);
207         sa.un.sun_family = AF_UNIX;
208         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
209
210         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
211         if (r < 0) {
212                 close_nointr_nofail(fd);
213                 return -errno;
214         }
215
216         if (shutdown(fd, SHUT_RD) < 0) {
217                 close_nointr_nofail(fd);
218                 return -errno;
219         }
220
221         dprintf(fd,
222                 "%s\n"
223                 "%s\n"
224                 "%i\n"
225                 "%i\n"
226                 "%i\n"
227                 "%i\n"
228                 "%i\n",
229                 context->syslog_identifier ? context->syslog_identifier : ident,
230                 unit_id,
231                 context->syslog_priority,
232                 !!context->syslog_level_prefix,
233                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
234                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
235                 is_terminal_output(output));
236
237         if (fd != nfd) {
238                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
239                 close_nointr_nofail(fd);
240         } else
241                 r = nfd;
242
243         return r;
244 }
245 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
246         int fd, r;
247
248         assert(path);
249         assert(nfd >= 0);
250
251         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
252                 return fd;
253
254         if (fd != nfd) {
255                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
256                 close_nointr_nofail(fd);
257         } else
258                 r = nfd;
259
260         return r;
261 }
262
263 static bool is_terminal_input(ExecInput i) {
264         return
265                 i == EXEC_INPUT_TTY ||
266                 i == EXEC_INPUT_TTY_FORCE ||
267                 i == EXEC_INPUT_TTY_FAIL;
268 }
269
270 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
271
272         if (is_terminal_input(std_input) && !apply_tty_stdin)
273                 return EXEC_INPUT_NULL;
274
275         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
276                 return EXEC_INPUT_NULL;
277
278         return std_input;
279 }
280
281 static int fixup_output(ExecOutput std_output, int socket_fd) {
282
283         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
284                 return EXEC_OUTPUT_INHERIT;
285
286         return std_output;
287 }
288
289 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
290         ExecInput i;
291
292         assert(context);
293
294         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
295
296         switch (i) {
297
298         case EXEC_INPUT_NULL:
299                 return open_null_as(O_RDONLY, STDIN_FILENO);
300
301         case EXEC_INPUT_TTY:
302         case EXEC_INPUT_TTY_FORCE:
303         case EXEC_INPUT_TTY_FAIL: {
304                 int fd, r;
305
306                 if ((fd = acquire_terminal(
307                                      tty_path(context),
308                                      i == EXEC_INPUT_TTY_FAIL,
309                                      i == EXEC_INPUT_TTY_FORCE,
310                                      false,
311                                      (usec_t) -1)) < 0)
312                         return fd;
313
314                 if (fd != STDIN_FILENO) {
315                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
316                         close_nointr_nofail(fd);
317                 } else
318                         r = STDIN_FILENO;
319
320                 return r;
321         }
322
323         case EXEC_INPUT_SOCKET:
324                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
325
326         default:
327                 assert_not_reached("Unknown input type");
328         }
329 }
330
331 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
332         ExecOutput o;
333         ExecInput i;
334         int r;
335
336         assert(context);
337         assert(ident);
338
339         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
340         o = fixup_output(context->std_output, socket_fd);
341
342         if (fileno == STDERR_FILENO) {
343                 ExecOutput e;
344                 e = fixup_output(context->std_error, socket_fd);
345
346                 /* This expects the input and output are already set up */
347
348                 /* Don't change the stderr file descriptor if we inherit all
349                  * the way and are not on a tty */
350                 if (e == EXEC_OUTPUT_INHERIT &&
351                     o == EXEC_OUTPUT_INHERIT &&
352                     i == EXEC_INPUT_NULL &&
353                     !is_terminal_input(context->std_input) &&
354                     getppid () != 1)
355                         return fileno;
356
357                 /* Duplicate from stdout if possible */
358                 if (e == o || e == EXEC_OUTPUT_INHERIT)
359                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
360
361                 o = e;
362
363         } else if (o == EXEC_OUTPUT_INHERIT) {
364                 /* If input got downgraded, inherit the original value */
365                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
366                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
367
368                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
369                 if (i != EXEC_INPUT_NULL)
370                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
371
372                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
373                 if (getppid() != 1)
374                         return fileno;
375
376                 /* We need to open /dev/null here anew, to get the right access mode. */
377                 return open_null_as(O_WRONLY, fileno);
378         }
379
380         switch (o) {
381
382         case EXEC_OUTPUT_NULL:
383                 return open_null_as(O_WRONLY, fileno);
384
385         case EXEC_OUTPUT_TTY:
386                 if (is_terminal_input(i))
387                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
388
389                 /* We don't reset the terminal if this is just about output */
390                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
391
392         case EXEC_OUTPUT_SYSLOG:
393         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
394         case EXEC_OUTPUT_KMSG:
395         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
396         case EXEC_OUTPUT_JOURNAL:
397         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
398                 r = connect_logger_as(context, o, ident, unit_id, fileno);
399                 if (r < 0) {
400                         log_struct_unit(LOG_CRIT, unit_id,
401                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
402                                 fileno == STDOUT_FILENO ? "out" : "err",
403                                 unit_id, strerror(-r),
404                                 "ERRNO=%d", -r,
405                                 NULL);
406                         r = open_null_as(O_WRONLY, fileno);
407                 }
408                 return r;
409
410         case EXEC_OUTPUT_SOCKET:
411                 assert(socket_fd >= 0);
412                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
413
414         default:
415                 assert_not_reached("Unknown error type");
416         }
417 }
418
419 static int chown_terminal(int fd, uid_t uid) {
420         struct stat st;
421
422         assert(fd >= 0);
423
424         /* This might fail. What matters are the results. */
425         (void) fchown(fd, uid, -1);
426         (void) fchmod(fd, TTY_MODE);
427
428         if (fstat(fd, &st) < 0)
429                 return -errno;
430
431         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
432                 return -EPERM;
433
434         return 0;
435 }
436
437 static int setup_confirm_stdio(int *_saved_stdin,
438                                int *_saved_stdout) {
439         int fd = -1, saved_stdin, saved_stdout = -1, r;
440
441         assert(_saved_stdin);
442         assert(_saved_stdout);
443
444         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
445         if (saved_stdin < 0)
446                 return -errno;
447
448         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
449         if (saved_stdout < 0) {
450                 r = errno;
451                 goto fail;
452         }
453
454         fd = acquire_terminal(
455                         "/dev/console",
456                         false,
457                         false,
458                         false,
459                         DEFAULT_CONFIRM_USEC);
460         if (fd < 0) {
461                 r = fd;
462                 goto fail;
463         }
464
465         r = chown_terminal(fd, getuid());
466         if (r < 0)
467                 goto fail;
468
469         if (dup2(fd, STDIN_FILENO) < 0) {
470                 r = -errno;
471                 goto fail;
472         }
473
474         if (dup2(fd, STDOUT_FILENO) < 0) {
475                 r = -errno;
476                 goto fail;
477         }
478
479         if (fd >= 2)
480                 close_nointr_nofail(fd);
481
482         *_saved_stdin = saved_stdin;
483         *_saved_stdout = saved_stdout;
484
485         return 0;
486
487 fail:
488         if (saved_stdout >= 0)
489                 close_nointr_nofail(saved_stdout);
490
491         if (saved_stdin >= 0)
492                 close_nointr_nofail(saved_stdin);
493
494         if (fd >= 0)
495                 close_nointr_nofail(fd);
496
497         return r;
498 }
499
500 static int write_confirm_message(const char *format, ...) {
501         int fd;
502         va_list ap;
503
504         assert(format);
505
506         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
507         if (fd < 0)
508                 return fd;
509
510         va_start(ap, format);
511         vdprintf(fd, format, ap);
512         va_end(ap);
513
514         close_nointr_nofail(fd);
515
516         return 0;
517 }
518
519 static int restore_confirm_stdio(int *saved_stdin,
520                                  int *saved_stdout) {
521
522         int r = 0;
523
524         assert(saved_stdin);
525         assert(saved_stdout);
526
527         release_terminal();
528
529         if (*saved_stdin >= 0)
530                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
531                         r = -errno;
532
533         if (*saved_stdout >= 0)
534                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
535                         r = -errno;
536
537         if (*saved_stdin >= 0)
538                 close_nointr_nofail(*saved_stdin);
539
540         if (*saved_stdout >= 0)
541                 close_nointr_nofail(*saved_stdout);
542
543         return r;
544 }
545
546 static int ask_for_confirmation(char *response, char **argv) {
547         int saved_stdout = -1, saved_stdin = -1, r;
548         char *line;
549
550         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
551         if (r < 0)
552                 return r;
553
554         line = exec_command_line(argv);
555         if (!line)
556                 return -ENOMEM;
557
558         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
559         free(line);
560
561         restore_confirm_stdio(&saved_stdin, &saved_stdout);
562
563         return r;
564 }
565
566 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
567         bool keep_groups = false;
568         int r;
569
570         assert(context);
571
572         /* Lookup and set GID and supplementary group list. Here too
573          * we avoid NSS lookups for gid=0. */
574
575         if (context->group || username) {
576
577                 if (context->group) {
578                         const char *g = context->group;
579
580                         if ((r = get_group_creds(&g, &gid)) < 0)
581                                 return r;
582                 }
583
584                 /* First step, initialize groups from /etc/groups */
585                 if (username && gid != 0) {
586                         if (initgroups(username, gid) < 0)
587                                 return -errno;
588
589                         keep_groups = true;
590                 }
591
592                 /* Second step, set our gids */
593                 if (setresgid(gid, gid, gid) < 0)
594                         return -errno;
595         }
596
597         if (context->supplementary_groups) {
598                 int ngroups_max, k;
599                 gid_t *gids;
600                 char **i;
601
602                 /* Final step, initialize any manually set supplementary groups */
603                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
604
605                 if (!(gids = new(gid_t, ngroups_max)))
606                         return -ENOMEM;
607
608                 if (keep_groups) {
609                         if ((k = getgroups(ngroups_max, gids)) < 0) {
610                                 free(gids);
611                                 return -errno;
612                         }
613                 } else
614                         k = 0;
615
616                 STRV_FOREACH(i, context->supplementary_groups) {
617                         const char *g;
618
619                         if (k >= ngroups_max) {
620                                 free(gids);
621                                 return -E2BIG;
622                         }
623
624                         g = *i;
625                         r = get_group_creds(&g, gids+k);
626                         if (r < 0) {
627                                 free(gids);
628                                 return r;
629                         }
630
631                         k++;
632                 }
633
634                 if (setgroups(k, gids) < 0) {
635                         free(gids);
636                         return -errno;
637                 }
638
639                 free(gids);
640         }
641
642         return 0;
643 }
644
645 static int enforce_user(const ExecContext *context, uid_t uid) {
646         int r;
647         assert(context);
648
649         /* Sets (but doesn't lookup) the uid and make sure we keep the
650          * capabilities while doing so. */
651
652         if (context->capabilities) {
653                 cap_t d;
654                 static const cap_value_t bits[] = {
655                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
656                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
657                 };
658
659                 /* First step: If we need to keep capabilities but
660                  * drop privileges we need to make sure we keep our
661                  * caps, whiel we drop privileges. */
662                 if (uid != 0) {
663                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
664
665                         if (prctl(PR_GET_SECUREBITS) != sb)
666                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
667                                         return -errno;
668                 }
669
670                 /* Second step: set the capabilities. This will reduce
671                  * the capabilities to the minimum we need. */
672
673                 if (!(d = cap_dup(context->capabilities)))
674                         return -errno;
675
676                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
677                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
678                         r = -errno;
679                         cap_free(d);
680                         return r;
681                 }
682
683                 if (cap_set_proc(d) < 0) {
684                         r = -errno;
685                         cap_free(d);
686                         return r;
687                 }
688
689                 cap_free(d);
690         }
691
692         /* Third step: actually set the uids */
693         if (setresuid(uid, uid, uid) < 0)
694                 return -errno;
695
696         /* At this point we should have all necessary capabilities but
697            are otherwise a normal user. However, the caps might got
698            corrupted due to the setresuid() so we need clean them up
699            later. This is done outside of this call. */
700
701         return 0;
702 }
703
704 #ifdef HAVE_PAM
705
706 static int null_conv(
707                 int num_msg,
708                 const struct pam_message **msg,
709                 struct pam_response **resp,
710                 void *appdata_ptr) {
711
712         /* We don't support conversations */
713
714         return PAM_CONV_ERR;
715 }
716
717 static int setup_pam(
718                 const char *name,
719                 const char *user,
720                 uid_t uid,
721                 const char *tty,
722                 char ***pam_env,
723                 int fds[], unsigned n_fds) {
724
725         static const struct pam_conv conv = {
726                 .conv = null_conv,
727                 .appdata_ptr = NULL
728         };
729
730         pam_handle_t *handle = NULL;
731         sigset_t ss, old_ss;
732         int pam_code = PAM_SUCCESS;
733         int err;
734         char **e = NULL;
735         bool close_session = false;
736         pid_t pam_pid = 0, parent_pid;
737
738         assert(name);
739         assert(user);
740         assert(pam_env);
741
742         /* We set up PAM in the parent process, then fork. The child
743          * will then stay around until killed via PR_GET_PDEATHSIG or
744          * systemd via the cgroup logic. It will then remove the PAM
745          * session again. The parent process will exec() the actual
746          * daemon. We do things this way to ensure that the main PID
747          * of the daemon is the one we initially fork()ed. */
748
749         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
750                 handle = NULL;
751                 goto fail;
752         }
753
754         if (tty)
755                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
756                         goto fail;
757
758         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
759                 goto fail;
760
761         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
762                 goto fail;
763
764         close_session = true;
765
766         if ((!(e = pam_getenvlist(handle)))) {
767                 pam_code = PAM_BUF_ERR;
768                 goto fail;
769         }
770
771         /* Block SIGTERM, so that we know that it won't get lost in
772          * the child */
773         if (sigemptyset(&ss) < 0 ||
774             sigaddset(&ss, SIGTERM) < 0 ||
775             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
776                 goto fail;
777
778         parent_pid = getpid();
779
780         if ((pam_pid = fork()) < 0)
781                 goto fail;
782
783         if (pam_pid == 0) {
784                 int sig;
785                 int r = EXIT_PAM;
786
787                 /* The child's job is to reset the PAM session on
788                  * termination */
789
790                 /* This string must fit in 10 chars (i.e. the length
791                  * of "/sbin/init"), to look pretty in /bin/ps */
792                 rename_process("(sd-pam)");
793
794                 /* Make sure we don't keep open the passed fds in this
795                 child. We assume that otherwise only those fds are
796                 open here that have been opened by PAM. */
797                 close_many(fds, n_fds);
798
799                 /* Drop privileges - we don't need any to pam_close_session
800                  * and this will make PR_SET_PDEATHSIG work in most cases.
801                  * If this fails, ignore the error - but expect sd-pam threads
802                  * to fail to exit normally */
803                 if (setresuid(uid, uid, uid) < 0)
804                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
805
806                 /* Wait until our parent died. This will only work if
807                  * the above setresuid() succeeds, otherwise the kernel
808                  * will not allow unprivileged parents kill their privileged
809                  * children this way. We rely on the control groups kill logic
810                  * to do the rest for us. */
811                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
812                         goto child_finish;
813
814                 /* Check if our parent process might already have
815                  * died? */
816                 if (getppid() == parent_pid) {
817                         for (;;) {
818                                 if (sigwait(&ss, &sig) < 0) {
819                                         if (errno == EINTR)
820                                                 continue;
821
822                                         goto child_finish;
823                                 }
824
825                                 assert(sig == SIGTERM);
826                                 break;
827                         }
828                 }
829
830                 /* If our parent died we'll end the session */
831                 if (getppid() != parent_pid)
832                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
833                                 goto child_finish;
834
835                 r = 0;
836
837         child_finish:
838                 pam_end(handle, pam_code | PAM_DATA_SILENT);
839                 _exit(r);
840         }
841
842         /* If the child was forked off successfully it will do all the
843          * cleanups, so forget about the handle here. */
844         handle = NULL;
845
846         /* Unblock SIGTERM again in the parent */
847         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
848                 goto fail;
849
850         /* We close the log explicitly here, since the PAM modules
851          * might have opened it, but we don't want this fd around. */
852         closelog();
853
854         *pam_env = e;
855         e = NULL;
856
857         return 0;
858
859 fail:
860         if (pam_code != PAM_SUCCESS)
861                 err = -EPERM;  /* PAM errors do not map to errno */
862         else
863                 err = -errno;
864
865         if (handle) {
866                 if (close_session)
867                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
868
869                 pam_end(handle, pam_code | PAM_DATA_SILENT);
870         }
871
872         strv_free(e);
873
874         closelog();
875
876         if (pam_pid > 1) {
877                 kill(pam_pid, SIGTERM);
878                 kill(pam_pid, SIGCONT);
879         }
880
881         return err;
882 }
883 #endif
884
885 static void rename_process_from_path(const char *path) {
886         char process_name[11];
887         const char *p;
888         size_t l;
889
890         /* This resulting string must fit in 10 chars (i.e. the length
891          * of "/sbin/init") to look pretty in /bin/ps */
892
893         p = path_get_file_name(path);
894         if (isempty(p)) {
895                 rename_process("(...)");
896                 return;
897         }
898
899         l = strlen(p);
900         if (l > 8) {
901                 /* The end of the process name is usually more
902                  * interesting, since the first bit might just be
903                  * "systemd-" */
904                 p = p + l - 8;
905                 l = 8;
906         }
907
908         process_name[0] = '(';
909         memcpy(process_name+1, p, l);
910         process_name[1+l] = ')';
911         process_name[1+l+1] = 0;
912
913         rename_process(process_name);
914 }
915
916 static int apply_seccomp(uint32_t *syscall_filter) {
917         static const struct sock_filter header[] = {
918                 VALIDATE_ARCHITECTURE,
919                 EXAMINE_SYSCALL
920         };
921         static const struct sock_filter footer[] = {
922                 _KILL_PROCESS
923         };
924
925         int i;
926         unsigned n;
927         struct sock_filter *f;
928         struct sock_fprog prog;
929
930         assert(syscall_filter);
931
932         /* First: count the syscalls to check for */
933         for (i = 0, n = 0; i < syscall_max(); i++)
934                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
935                         n++;
936
937         /* Second: build the filter program from a header the syscall
938          * matches and the footer */
939         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
940         memcpy(f, header, sizeof(header));
941
942         for (i = 0, n = 0; i < syscall_max(); i++)
943                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
944                         struct sock_filter item[] = {
945                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
946                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
947                         };
948
949                         assert_cc(ELEMENTSOF(item) == 2);
950
951                         f[ELEMENTSOF(header) + 2*n]  = item[0];
952                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
953
954                         n++;
955                 }
956
957         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
958
959         /* Third: install the filter */
960         zero(prog);
961         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
962         prog.filter = f;
963         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
964                 return -errno;
965
966         return 0;
967 }
968
969 int exec_spawn(ExecCommand *command,
970                char **argv,
971                const ExecContext *context,
972                int fds[], unsigned n_fds,
973                char **environment,
974                bool apply_permissions,
975                bool apply_chroot,
976                bool apply_tty_stdin,
977                bool confirm_spawn,
978                CGroupBonding *cgroup_bondings,
979                CGroupAttribute *cgroup_attributes,
980                const char *cgroup_suffix,
981                const char *unit_id,
982                int idle_pipe[2],
983                pid_t *ret) {
984
985         pid_t pid;
986         int r;
987         char *line;
988         int socket_fd;
989         char _cleanup_strv_free_ **files_env = NULL;
990
991         assert(command);
992         assert(context);
993         assert(ret);
994         assert(fds || n_fds <= 0);
995
996         if (context->std_input == EXEC_INPUT_SOCKET ||
997             context->std_output == EXEC_OUTPUT_SOCKET ||
998             context->std_error == EXEC_OUTPUT_SOCKET) {
999
1000                 if (n_fds != 1)
1001                         return -EINVAL;
1002
1003                 socket_fd = fds[0];
1004
1005                 fds = NULL;
1006                 n_fds = 0;
1007         } else
1008                 socket_fd = -1;
1009
1010         r = exec_context_load_environment(context, &files_env);
1011         if (r < 0) {
1012                 log_struct_unit(LOG_ERR,
1013                            unit_id,
1014                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1015                            "ERRNO=%d", -r,
1016                            NULL);
1017                 return r;
1018         }
1019
1020         if (!argv)
1021                 argv = command->argv;
1022
1023         line = exec_command_line(argv);
1024         if (!line)
1025                 return log_oom();
1026
1027         log_struct_unit(LOG_DEBUG,
1028                    unit_id,
1029                    "MESSAGE=About to execute %s", line,
1030                    NULL);
1031         free(line);
1032
1033         r = cgroup_bonding_realize_list(cgroup_bondings);
1034         if (r < 0)
1035                 return r;
1036
1037         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1038
1039         pid = fork();
1040         if (pid < 0)
1041                 return -errno;
1042
1043         if (pid == 0) {
1044                 int i, err;
1045                 sigset_t ss;
1046                 const char *username = NULL, *home = NULL;
1047                 uid_t uid = (uid_t) -1;
1048                 gid_t gid = (gid_t) -1;
1049                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1050                         **final_env = NULL, **final_argv = NULL;
1051                 unsigned n_env = 0;
1052                 bool set_access = false;
1053
1054                 /* child */
1055
1056                 rename_process_from_path(command->path);
1057
1058                 /* We reset exactly these signals, since they are the
1059                  * only ones we set to SIG_IGN in the main daemon. All
1060                  * others we leave untouched because we set them to
1061                  * SIG_DFL or a valid handler initially, both of which
1062                  * will be demoted to SIG_DFL. */
1063                 default_signals(SIGNALS_CRASH_HANDLER,
1064                                 SIGNALS_IGNORE, -1);
1065
1066                 if (context->ignore_sigpipe)
1067                         ignore_signals(SIGPIPE, -1);
1068
1069                 assert_se(sigemptyset(&ss) == 0);
1070                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1071                         err = -errno;
1072                         r = EXIT_SIGNAL_MASK;
1073                         goto fail_child;
1074                 }
1075
1076                 if (idle_pipe) {
1077                         if (idle_pipe[1] >= 0)
1078                                 close_nointr_nofail(idle_pipe[1]);
1079                         if (idle_pipe[0] >= 0) {
1080                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1081                                 close_nointr_nofail(idle_pipe[0]);
1082                         }
1083                 }
1084
1085                 /* Close sockets very early to make sure we don't
1086                  * block init reexecution because it cannot bind its
1087                  * sockets */
1088                 log_forget_fds();
1089                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1090                                            socket_fd >= 0 ? 1 : n_fds);
1091                 if (err < 0) {
1092                         r = EXIT_FDS;
1093                         goto fail_child;
1094                 }
1095
1096                 if (!context->same_pgrp)
1097                         if (setsid() < 0) {
1098                                 err = -errno;
1099                                 r = EXIT_SETSID;
1100                                 goto fail_child;
1101                         }
1102
1103                 if (context->tcpwrap_name) {
1104                         if (socket_fd >= 0)
1105                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1106                                         err = -EACCES;
1107                                         r = EXIT_TCPWRAP;
1108                                         goto fail_child;
1109                                 }
1110
1111                         for (i = 0; i < (int) n_fds; i++) {
1112                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1113                                         err = -EACCES;
1114                                         r = EXIT_TCPWRAP;
1115                                         goto fail_child;
1116                                 }
1117                         }
1118                 }
1119
1120                 exec_context_tty_reset(context);
1121
1122                 if (confirm_spawn) {
1123                         char response;
1124
1125                         err = ask_for_confirmation(&response, argv);
1126                         if (err == -ETIMEDOUT)
1127                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1128                         else if (err < 0)
1129                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1130                         else if (response == 's') {
1131                                 write_confirm_message("Skipping execution.\n");
1132                                 err = -ECANCELED;
1133                                 r = EXIT_CONFIRM;
1134                                 goto fail_child;
1135                         } else if (response == 'n') {
1136                                 write_confirm_message("Failing execution.\n");
1137                                 err = r = 0;
1138                                 goto fail_child;
1139                         }
1140                 }
1141
1142                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1143                  * must sure to drop O_NONBLOCK */
1144                 if (socket_fd >= 0)
1145                         fd_nonblock(socket_fd, false);
1146
1147                 err = setup_input(context, socket_fd, apply_tty_stdin);
1148                 if (err < 0) {
1149                         r = EXIT_STDIN;
1150                         goto fail_child;
1151                 }
1152
1153                 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1154                 if (err < 0) {
1155                         r = EXIT_STDOUT;
1156                         goto fail_child;
1157                 }
1158
1159                 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1160                 if (err < 0) {
1161                         r = EXIT_STDERR;
1162                         goto fail_child;
1163                 }
1164
1165                 if (cgroup_bondings) {
1166                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1167                         if (err < 0) {
1168                                 r = EXIT_CGROUP;
1169                                 goto fail_child;
1170                         }
1171                 }
1172
1173                 if (context->oom_score_adjust_set) {
1174                         char t[16];
1175
1176                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1177                         char_array_0(t);
1178
1179                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1180                                 err = -errno;
1181                                 r = EXIT_OOM_ADJUST;
1182                                 goto fail_child;
1183                         }
1184                 }
1185
1186                 if (context->nice_set)
1187                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1188                                 err = -errno;
1189                                 r = EXIT_NICE;
1190                                 goto fail_child;
1191                         }
1192
1193                 if (context->cpu_sched_set) {
1194                         struct sched_param param;
1195
1196                         zero(param);
1197                         param.sched_priority = context->cpu_sched_priority;
1198
1199                         if (sched_setscheduler(0, context->cpu_sched_policy |
1200                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1201                                 err = -errno;
1202                                 r = EXIT_SETSCHEDULER;
1203                                 goto fail_child;
1204                         }
1205                 }
1206
1207                 if (context->cpuset)
1208                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1209                                 err = -errno;
1210                                 r = EXIT_CPUAFFINITY;
1211                                 goto fail_child;
1212                         }
1213
1214                 if (context->ioprio_set)
1215                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1216                                 err = -errno;
1217                                 r = EXIT_IOPRIO;
1218                                 goto fail_child;
1219                         }
1220
1221                 if (context->timer_slack_nsec != (nsec_t) -1)
1222                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1223                                 err = -errno;
1224                                 r = EXIT_TIMERSLACK;
1225                                 goto fail_child;
1226                         }
1227
1228                 if (context->utmp_id)
1229                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1230
1231                 if (context->user) {
1232                         username = context->user;
1233                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1234                         if (err < 0) {
1235                                 r = EXIT_USER;
1236                                 goto fail_child;
1237                         }
1238
1239                         if (is_terminal_input(context->std_input)) {
1240                                 err = chown_terminal(STDIN_FILENO, uid);
1241                                 if (err < 0) {
1242                                         r = EXIT_STDIN;
1243                                         goto fail_child;
1244                                 }
1245                         }
1246
1247                         if (cgroup_bondings && context->control_group_modify) {
1248                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1249                                 if (err >= 0)
1250                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1251                                 if (err < 0) {
1252                                         r = EXIT_CGROUP;
1253                                         goto fail_child;
1254                                 }
1255
1256                                 set_access = true;
1257                         }
1258                 }
1259
1260                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1261                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1262                         if (err < 0) {
1263                                 r = EXIT_CGROUP;
1264                                 goto fail_child;
1265                         }
1266                 }
1267
1268                 if (apply_permissions) {
1269                         err = enforce_groups(context, username, gid);
1270                         if (err < 0) {
1271                                 r = EXIT_GROUP;
1272                                 goto fail_child;
1273                         }
1274                 }
1275
1276                 umask(context->umask);
1277
1278 #ifdef HAVE_PAM
1279                 if (apply_permissions && context->pam_name && username) {
1280                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1281                         if (err < 0) {
1282                                 r = EXIT_PAM;
1283                                 goto fail_child;
1284                         }
1285                 }
1286 #endif
1287                 if (context->private_network) {
1288                         if (unshare(CLONE_NEWNET) < 0) {
1289                                 err = -errno;
1290                                 r = EXIT_NETWORK;
1291                                 goto fail_child;
1292                         }
1293
1294                         loopback_setup();
1295                 }
1296
1297                 if (strv_length(context->read_write_dirs) > 0 ||
1298                     strv_length(context->read_only_dirs) > 0 ||
1299                     strv_length(context->inaccessible_dirs) > 0 ||
1300                     context->mount_flags != 0 ||
1301                     context->private_tmp) {
1302                         err = setup_namespace(context->read_write_dirs,
1303                                               context->read_only_dirs,
1304                                               context->inaccessible_dirs,
1305                                               context->private_tmp,
1306                                               context->mount_flags);
1307                         if (err < 0) {
1308                                 r = EXIT_NAMESPACE;
1309                                 goto fail_child;
1310                         }
1311                 }
1312
1313                 if (apply_chroot) {
1314                         if (context->root_directory)
1315                                 if (chroot(context->root_directory) < 0) {
1316                                         err = -errno;
1317                                         r = EXIT_CHROOT;
1318                                         goto fail_child;
1319                                 }
1320
1321                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1322                                 err = -errno;
1323                                 r = EXIT_CHDIR;
1324                                 goto fail_child;
1325                         }
1326                 } else {
1327                         char _cleanup_free_ *d = NULL;
1328
1329                         if (asprintf(&d, "%s/%s",
1330                                      context->root_directory ? context->root_directory : "",
1331                                      context->working_directory ? context->working_directory : "") < 0) {
1332                                 err = -ENOMEM;
1333                                 r = EXIT_MEMORY;
1334                                 goto fail_child;
1335                         }
1336
1337                         if (chdir(d) < 0) {
1338                                 err = -errno;
1339                                 r = EXIT_CHDIR;
1340                                 goto fail_child;
1341                         }
1342                 }
1343
1344                 /* We repeat the fd closing here, to make sure that
1345                  * nothing is leaked from the PAM modules */
1346                 err = close_all_fds(fds, n_fds);
1347                 if (err >= 0)
1348                         err = shift_fds(fds, n_fds);
1349                 if (err >= 0)
1350                         err = flags_fds(fds, n_fds, context->non_blocking);
1351                 if (err < 0) {
1352                         r = EXIT_FDS;
1353                         goto fail_child;
1354                 }
1355
1356                 if (apply_permissions) {
1357
1358                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1359                                 if (!context->rlimit[i])
1360                                         continue;
1361
1362                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1363                                         err = -errno;
1364                                         r = EXIT_LIMITS;
1365                                         goto fail_child;
1366                                 }
1367                         }
1368
1369                         if (context->capability_bounding_set_drop) {
1370                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1371                                 if (err < 0) {
1372                                         r = EXIT_CAPABILITIES;
1373                                         goto fail_child;
1374                                 }
1375                         }
1376
1377                         if (context->user) {
1378                                 err = enforce_user(context, uid);
1379                                 if (err < 0) {
1380                                         r = EXIT_USER;
1381                                         goto fail_child;
1382                                 }
1383                         }
1384
1385                         /* PR_GET_SECUREBITS is not privileged, while
1386                          * PR_SET_SECUREBITS is. So to suppress
1387                          * potential EPERMs we'll try not to call
1388                          * PR_SET_SECUREBITS unless necessary. */
1389                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1390                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1391                                         err = -errno;
1392                                         r = EXIT_SECUREBITS;
1393                                         goto fail_child;
1394                                 }
1395
1396                         if (context->capabilities)
1397                                 if (cap_set_proc(context->capabilities) < 0) {
1398                                         err = -errno;
1399                                         r = EXIT_CAPABILITIES;
1400                                         goto fail_child;
1401                                 }
1402
1403                         if (context->no_new_privileges)
1404                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1405                                         err = -errno;
1406                                         r = EXIT_NO_NEW_PRIVILEGES;
1407                                         goto fail_child;
1408                                 }
1409
1410                         if (context->syscall_filter) {
1411                                 err = apply_seccomp(context->syscall_filter);
1412                                 if (err < 0) {
1413                                         r = EXIT_SECCOMP;
1414                                         goto fail_child;
1415                                 }
1416                         }
1417                 }
1418
1419                 if (!(our_env = new0(char*, 7))) {
1420                         err = -ENOMEM;
1421                         r = EXIT_MEMORY;
1422                         goto fail_child;
1423                 }
1424
1425                 if (n_fds > 0)
1426                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1427                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1428                                 err = -ENOMEM;
1429                                 r = EXIT_MEMORY;
1430                                 goto fail_child;
1431                         }
1432
1433                 if (home)
1434                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1435                                 err = -ENOMEM;
1436                                 r = EXIT_MEMORY;
1437                                 goto fail_child;
1438                         }
1439
1440                 if (username)
1441                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1442                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1443                                 err = -ENOMEM;
1444                                 r = EXIT_MEMORY;
1445                                 goto fail_child;
1446                         }
1447
1448                 if (is_terminal_input(context->std_input) ||
1449                     context->std_output == EXEC_OUTPUT_TTY ||
1450                     context->std_error == EXEC_OUTPUT_TTY)
1451                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1452                                 err = -ENOMEM;
1453                                 r = EXIT_MEMORY;
1454                                 goto fail_child;
1455                         }
1456
1457                 assert(n_env <= 7);
1458
1459                 if (!(final_env = strv_env_merge(
1460                                       5,
1461                                       environment,
1462                                       our_env,
1463                                       context->environment,
1464                                       files_env,
1465                                       pam_env,
1466                                       NULL))) {
1467                         err = -ENOMEM;
1468                         r = EXIT_MEMORY;
1469                         goto fail_child;
1470                 }
1471
1472                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1473                         err = -ENOMEM;
1474                         r = EXIT_MEMORY;
1475                         goto fail_child;
1476                 }
1477
1478                 final_env = strv_env_clean(final_env);
1479
1480                 execve(command->path, final_argv, final_env);
1481                 err = -errno;
1482                 r = EXIT_EXEC;
1483
1484         fail_child:
1485                 if (r != 0) {
1486                         log_open();
1487                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1488                                    "EXECUTABLE=%s", command->path,
1489                                    "MESSAGE=Failed at step %s spawning %s: %s",
1490                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1491                                           command->path, strerror(-err),
1492                                    "ERRNO=%d", -err,
1493                                    NULL);
1494                         log_close();
1495                 }
1496
1497                 _exit(r);
1498         }
1499
1500         log_struct_unit(LOG_DEBUG,
1501                    unit_id,
1502                    "MESSAGE=Forked %s as %lu",
1503                           command->path, (unsigned long) pid,
1504                    NULL);
1505
1506         /* We add the new process to the cgroup both in the child (so
1507          * that we can be sure that no user code is ever executed
1508          * outside of the cgroup) and in the parent (so that we can be
1509          * sure that when we kill the cgroup the process will be
1510          * killed too). */
1511         if (cgroup_bondings)
1512                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1513
1514         exec_status_start(&command->exec_status, pid);
1515
1516         *ret = pid;
1517         return 0;
1518 }
1519
1520 void exec_context_init(ExecContext *c) {
1521         assert(c);
1522
1523         c->umask = 0022;
1524         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1525         c->cpu_sched_policy = SCHED_OTHER;
1526         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1527         c->syslog_level_prefix = true;
1528         c->control_group_persistent = -1;
1529         c->ignore_sigpipe = true;
1530         c->timer_slack_nsec = (nsec_t) -1;
1531 }
1532
1533 void exec_context_done(ExecContext *c) {
1534         unsigned l;
1535
1536         assert(c);
1537
1538         strv_free(c->environment);
1539         c->environment = NULL;
1540
1541         strv_free(c->environment_files);
1542         c->environment_files = NULL;
1543
1544         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1545                 free(c->rlimit[l]);
1546                 c->rlimit[l] = NULL;
1547         }
1548
1549         free(c->working_directory);
1550         c->working_directory = NULL;
1551         free(c->root_directory);
1552         c->root_directory = NULL;
1553
1554         free(c->tty_path);
1555         c->tty_path = NULL;
1556
1557         free(c->tcpwrap_name);
1558         c->tcpwrap_name = NULL;
1559
1560         free(c->syslog_identifier);
1561         c->syslog_identifier = NULL;
1562
1563         free(c->user);
1564         c->user = NULL;
1565
1566         free(c->group);
1567         c->group = NULL;
1568
1569         strv_free(c->supplementary_groups);
1570         c->supplementary_groups = NULL;
1571
1572         free(c->pam_name);
1573         c->pam_name = NULL;
1574
1575         if (c->capabilities) {
1576                 cap_free(c->capabilities);
1577                 c->capabilities = NULL;
1578         }
1579
1580         strv_free(c->read_only_dirs);
1581         c->read_only_dirs = NULL;
1582
1583         strv_free(c->read_write_dirs);
1584         c->read_write_dirs = NULL;
1585
1586         strv_free(c->inaccessible_dirs);
1587         c->inaccessible_dirs = NULL;
1588
1589         if (c->cpuset)
1590                 CPU_FREE(c->cpuset);
1591
1592         free(c->utmp_id);
1593         c->utmp_id = NULL;
1594
1595         free(c->syscall_filter);
1596         c->syscall_filter = NULL;
1597 }
1598
1599 void exec_command_done(ExecCommand *c) {
1600         assert(c);
1601
1602         free(c->path);
1603         c->path = NULL;
1604
1605         strv_free(c->argv);
1606         c->argv = NULL;
1607 }
1608
1609 void exec_command_done_array(ExecCommand *c, unsigned n) {
1610         unsigned i;
1611
1612         for (i = 0; i < n; i++)
1613                 exec_command_done(c+i);
1614 }
1615
1616 void exec_command_free_list(ExecCommand *c) {
1617         ExecCommand *i;
1618
1619         while ((i = c)) {
1620                 LIST_REMOVE(ExecCommand, command, c, i);
1621                 exec_command_done(i);
1622                 free(i);
1623         }
1624 }
1625
1626 void exec_command_free_array(ExecCommand **c, unsigned n) {
1627         unsigned i;
1628
1629         for (i = 0; i < n; i++) {
1630                 exec_command_free_list(c[i]);
1631                 c[i] = NULL;
1632         }
1633 }
1634
1635 int exec_context_load_environment(const ExecContext *c, char ***l) {
1636         char **i, **r = NULL;
1637
1638         assert(c);
1639         assert(l);
1640
1641         STRV_FOREACH(i, c->environment_files) {
1642                 char *fn;
1643                 int k;
1644                 bool ignore = false;
1645                 char **p;
1646                 glob_t pglob;
1647                 int count, n;
1648
1649                 fn = *i;
1650
1651                 if (fn[0] == '-') {
1652                         ignore = true;
1653                         fn ++;
1654                 }
1655
1656                 if (!path_is_absolute(fn)) {
1657
1658                         if (ignore)
1659                                 continue;
1660
1661                         strv_free(r);
1662                         return -EINVAL;
1663                 }
1664
1665                 /* Filename supports globbing, take all matching files */
1666                 zero(pglob);
1667                 errno = 0;
1668                 if (glob(fn, 0, NULL, &pglob) != 0) {
1669                         globfree(&pglob);
1670                         if (ignore)
1671                                 continue;
1672
1673                         strv_free(r);
1674                         return errno ? -errno : -EINVAL;
1675                 }
1676                 count = pglob.gl_pathc;
1677                 if (count == 0) {
1678                         globfree(&pglob);
1679                         if (ignore)
1680                                 continue;
1681
1682                         strv_free(r);
1683                         return -EINVAL;
1684                 }
1685                 for (n = 0; n < count; n++) {
1686                         k = load_env_file(pglob.gl_pathv[n], &p);
1687                         if (k < 0) {
1688                                 if (ignore)
1689                                         continue;
1690
1691                                 strv_free(r);
1692                                 globfree(&pglob);
1693                                 return k;
1694                          }
1695
1696                         if (r == NULL)
1697                                 r = p;
1698                         else {
1699                                 char **m;
1700
1701                                 m = strv_env_merge(2, r, p);
1702                                 strv_free(r);
1703                                 strv_free(p);
1704
1705                                 if (!m) {
1706                                         globfree(&pglob);
1707                                         return -ENOMEM;
1708                                 }
1709
1710                                 r = m;
1711                         }
1712                 }
1713                 globfree(&pglob);
1714         }
1715
1716         *l = r;
1717
1718         return 0;
1719 }
1720
1721 static void strv_fprintf(FILE *f, char **l) {
1722         char **g;
1723
1724         assert(f);
1725
1726         STRV_FOREACH(g, l)
1727                 fprintf(f, " %s", *g);
1728 }
1729
1730 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1731         char ** e;
1732         unsigned i;
1733
1734         assert(c);
1735         assert(f);
1736
1737         if (!prefix)
1738                 prefix = "";
1739
1740         fprintf(f,
1741                 "%sUMask: %04o\n"
1742                 "%sWorkingDirectory: %s\n"
1743                 "%sRootDirectory: %s\n"
1744                 "%sNonBlocking: %s\n"
1745                 "%sPrivateTmp: %s\n"
1746                 "%sControlGroupModify: %s\n"
1747                 "%sControlGroupPersistent: %s\n"
1748                 "%sPrivateNetwork: %s\n"
1749                 "%sIgnoreSIGPIPE: %s\n",
1750                 prefix, c->umask,
1751                 prefix, c->working_directory ? c->working_directory : "/",
1752                 prefix, c->root_directory ? c->root_directory : "/",
1753                 prefix, yes_no(c->non_blocking),
1754                 prefix, yes_no(c->private_tmp),
1755                 prefix, yes_no(c->control_group_modify),
1756                 prefix, yes_no(c->control_group_persistent),
1757                 prefix, yes_no(c->private_network),
1758                 prefix, yes_no(c->ignore_sigpipe));
1759
1760         STRV_FOREACH(e, c->environment)
1761                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1762
1763         STRV_FOREACH(e, c->environment_files)
1764                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1765
1766         if (c->tcpwrap_name)
1767                 fprintf(f,
1768                         "%sTCPWrapName: %s\n",
1769                         prefix, c->tcpwrap_name);
1770
1771         if (c->nice_set)
1772                 fprintf(f,
1773                         "%sNice: %i\n",
1774                         prefix, c->nice);
1775
1776         if (c->oom_score_adjust_set)
1777                 fprintf(f,
1778                         "%sOOMScoreAdjust: %i\n",
1779                         prefix, c->oom_score_adjust);
1780
1781         for (i = 0; i < RLIM_NLIMITS; i++)
1782                 if (c->rlimit[i])
1783                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1784
1785         if (c->ioprio_set) {
1786                 char *class_str;
1787                 int r;
1788
1789                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1790                 if (r < 0)
1791                         class_str = NULL;
1792                 fprintf(f,
1793                         "%sIOSchedulingClass: %s\n"
1794                         "%sIOPriority: %i\n",
1795                         prefix, strna(class_str),
1796                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1797                 free(class_str);
1798         }
1799
1800         if (c->cpu_sched_set) {
1801                 char *policy_str;
1802                 int r;
1803
1804                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1805                 if (r < 0)
1806                         policy_str = NULL;
1807                 fprintf(f,
1808                         "%sCPUSchedulingPolicy: %s\n"
1809                         "%sCPUSchedulingPriority: %i\n"
1810                         "%sCPUSchedulingResetOnFork: %s\n",
1811                         prefix, strna(policy_str),
1812                         prefix, c->cpu_sched_priority,
1813                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1814                 free(policy_str);
1815         }
1816
1817         if (c->cpuset) {
1818                 fprintf(f, "%sCPUAffinity:", prefix);
1819                 for (i = 0; i < c->cpuset_ncpus; i++)
1820                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1821                                 fprintf(f, " %i", i);
1822                 fputs("\n", f);
1823         }
1824
1825         if (c->timer_slack_nsec != (nsec_t) -1)
1826                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1827
1828         fprintf(f,
1829                 "%sStandardInput: %s\n"
1830                 "%sStandardOutput: %s\n"
1831                 "%sStandardError: %s\n",
1832                 prefix, exec_input_to_string(c->std_input),
1833                 prefix, exec_output_to_string(c->std_output),
1834                 prefix, exec_output_to_string(c->std_error));
1835
1836         if (c->tty_path)
1837                 fprintf(f,
1838                         "%sTTYPath: %s\n"
1839                         "%sTTYReset: %s\n"
1840                         "%sTTYVHangup: %s\n"
1841                         "%sTTYVTDisallocate: %s\n",
1842                         prefix, c->tty_path,
1843                         prefix, yes_no(c->tty_reset),
1844                         prefix, yes_no(c->tty_vhangup),
1845                         prefix, yes_no(c->tty_vt_disallocate));
1846
1847         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1848             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1849             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1850             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1851                 char *fac_str, *lvl_str;
1852                 int r;
1853
1854                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1855                 if (r < 0)
1856                         fac_str = NULL;
1857
1858                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1859                 if (r < 0)
1860                         lvl_str = NULL;
1861
1862                 fprintf(f,
1863                         "%sSyslogFacility: %s\n"
1864                         "%sSyslogLevel: %s\n",
1865                         prefix, strna(fac_str),
1866                         prefix, strna(lvl_str));
1867                 free(lvl_str);
1868                 free(fac_str);
1869         }
1870
1871         if (c->capabilities) {
1872                 char *t;
1873                 if ((t = cap_to_text(c->capabilities, NULL))) {
1874                         fprintf(f, "%sCapabilities: %s\n",
1875                                 prefix, t);
1876                         cap_free(t);
1877                 }
1878         }
1879
1880         if (c->secure_bits)
1881                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1882                         prefix,
1883                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1884                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1885                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1886                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1887                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1888                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1889
1890         if (c->capability_bounding_set_drop) {
1891                 unsigned long l;
1892                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1893
1894                 for (l = 0; l <= cap_last_cap(); l++)
1895                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1896                                 char *t;
1897
1898                                 if ((t = cap_to_name(l))) {
1899                                         fprintf(f, " %s", t);
1900                                         cap_free(t);
1901                                 }
1902                         }
1903
1904                 fputs("\n", f);
1905         }
1906
1907         if (c->user)
1908                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1909         if (c->group)
1910                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1911
1912         if (strv_length(c->supplementary_groups) > 0) {
1913                 fprintf(f, "%sSupplementaryGroups:", prefix);
1914                 strv_fprintf(f, c->supplementary_groups);
1915                 fputs("\n", f);
1916         }
1917
1918         if (c->pam_name)
1919                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1920
1921         if (strv_length(c->read_write_dirs) > 0) {
1922                 fprintf(f, "%sReadWriteDirs:", prefix);
1923                 strv_fprintf(f, c->read_write_dirs);
1924                 fputs("\n", f);
1925         }
1926
1927         if (strv_length(c->read_only_dirs) > 0) {
1928                 fprintf(f, "%sReadOnlyDirs:", prefix);
1929                 strv_fprintf(f, c->read_only_dirs);
1930                 fputs("\n", f);
1931         }
1932
1933         if (strv_length(c->inaccessible_dirs) > 0) {
1934                 fprintf(f, "%sInaccessibleDirs:", prefix);
1935                 strv_fprintf(f, c->inaccessible_dirs);
1936                 fputs("\n", f);
1937         }
1938
1939         if (c->utmp_id)
1940                 fprintf(f,
1941                         "%sUtmpIdentifier: %s\n",
1942                         prefix, c->utmp_id);
1943 }
1944
1945 void exec_status_start(ExecStatus *s, pid_t pid) {
1946         assert(s);
1947
1948         zero(*s);
1949         s->pid = pid;
1950         dual_timestamp_get(&s->start_timestamp);
1951 }
1952
1953 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1954         assert(s);
1955
1956         if (s->pid && s->pid != pid)
1957                 zero(*s);
1958
1959         s->pid = pid;
1960         dual_timestamp_get(&s->exit_timestamp);
1961
1962         s->code = code;
1963         s->status = status;
1964
1965         if (context) {
1966                 if (context->utmp_id)
1967                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1968
1969                 exec_context_tty_reset(context);
1970         }
1971 }
1972
1973 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1974         char buf[FORMAT_TIMESTAMP_MAX];
1975
1976         assert(s);
1977         assert(f);
1978
1979         if (!prefix)
1980                 prefix = "";
1981
1982         if (s->pid <= 0)
1983                 return;
1984
1985         fprintf(f,
1986                 "%sPID: %lu\n",
1987                 prefix, (unsigned long) s->pid);
1988
1989         if (s->start_timestamp.realtime > 0)
1990                 fprintf(f,
1991                         "%sStart Timestamp: %s\n",
1992                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1993
1994         if (s->exit_timestamp.realtime > 0)
1995                 fprintf(f,
1996                         "%sExit Timestamp: %s\n"
1997                         "%sExit Code: %s\n"
1998                         "%sExit Status: %i\n",
1999                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2000                         prefix, sigchld_code_to_string(s->code),
2001                         prefix, s->status);
2002 }
2003
2004 char *exec_command_line(char **argv) {
2005         size_t k;
2006         char *n, *p, **a;
2007         bool first = true;
2008
2009         assert(argv);
2010
2011         k = 1;
2012         STRV_FOREACH(a, argv)
2013                 k += strlen(*a)+3;
2014
2015         if (!(n = new(char, k)))
2016                 return NULL;
2017
2018         p = n;
2019         STRV_FOREACH(a, argv) {
2020
2021                 if (!first)
2022                         *(p++) = ' ';
2023                 else
2024                         first = false;
2025
2026                 if (strpbrk(*a, WHITESPACE)) {
2027                         *(p++) = '\'';
2028                         p = stpcpy(p, *a);
2029                         *(p++) = '\'';
2030                 } else
2031                         p = stpcpy(p, *a);
2032
2033         }
2034
2035         *p = 0;
2036
2037         /* FIXME: this doesn't really handle arguments that have
2038          * spaces and ticks in them */
2039
2040         return n;
2041 }
2042
2043 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2044         char *p2;
2045         const char *prefix2;
2046
2047         char *cmd;
2048
2049         assert(c);
2050         assert(f);
2051
2052         if (!prefix)
2053                 prefix = "";
2054         p2 = strappend(prefix, "\t");
2055         prefix2 = p2 ? p2 : prefix;
2056
2057         cmd = exec_command_line(c->argv);
2058
2059         fprintf(f,
2060                 "%sCommand Line: %s\n",
2061                 prefix, cmd ? cmd : strerror(ENOMEM));
2062
2063         free(cmd);
2064
2065         exec_status_dump(&c->exec_status, f, prefix2);
2066
2067         free(p2);
2068 }
2069
2070 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2071         assert(f);
2072
2073         if (!prefix)
2074                 prefix = "";
2075
2076         LIST_FOREACH(command, c, c)
2077                 exec_command_dump(c, f, prefix);
2078 }
2079
2080 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2081         ExecCommand *end;
2082
2083         assert(l);
2084         assert(e);
2085
2086         if (*l) {
2087                 /* It's kind of important, that we keep the order here */
2088                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2089                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2090         } else
2091               *l = e;
2092 }
2093
2094 int exec_command_set(ExecCommand *c, const char *path, ...) {
2095         va_list ap;
2096         char **l, *p;
2097
2098         assert(c);
2099         assert(path);
2100
2101         va_start(ap, path);
2102         l = strv_new_ap(path, ap);
2103         va_end(ap);
2104
2105         if (!l)
2106                 return -ENOMEM;
2107
2108         if (!(p = strdup(path))) {
2109                 strv_free(l);
2110                 return -ENOMEM;
2111         }
2112
2113         free(c->path);
2114         c->path = p;
2115
2116         strv_free(c->argv);
2117         c->argv = l;
2118
2119         return 0;
2120 }
2121
2122 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2123         [EXEC_INPUT_NULL] = "null",
2124         [EXEC_INPUT_TTY] = "tty",
2125         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2126         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2127         [EXEC_INPUT_SOCKET] = "socket"
2128 };
2129
2130 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2131
2132 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2133         [EXEC_OUTPUT_INHERIT] = "inherit",
2134         [EXEC_OUTPUT_NULL] = "null",
2135         [EXEC_OUTPUT_TTY] = "tty",
2136         [EXEC_OUTPUT_SYSLOG] = "syslog",
2137         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2138         [EXEC_OUTPUT_KMSG] = "kmsg",
2139         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2140         [EXEC_OUTPUT_JOURNAL] = "journal",
2141         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2142         [EXEC_OUTPUT_SOCKET] = "socket"
2143 };
2144
2145 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);