chiark / gitweb /
92cf1746417e5ab14fd517142f54b8dee3b1d843
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71
72 /* This assumes there is a 'tty' group */
73 #define TTY_MODE 0620
74
75 static int shift_fds(int fds[], unsigned n_fds) {
76         int start, restart_from;
77
78         if (n_fds <= 0)
79                 return 0;
80
81         /* Modifies the fds array! (sorts it) */
82
83         assert(fds);
84
85         start = 0;
86         for (;;) {
87                 int i;
88
89                 restart_from = -1;
90
91                 for (i = start; i < (int) n_fds; i++) {
92                         int nfd;
93
94                         /* Already at right index? */
95                         if (fds[i] == i+3)
96                                 continue;
97
98                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99                                 return -errno;
100
101                         close_nointr_nofail(fds[i]);
102                         fds[i] = nfd;
103
104                         /* Hmm, the fd we wanted isn't free? Then
105                          * let's remember that and try again from here*/
106                         if (nfd != i+3 && restart_from < 0)
107                                 restart_from = i;
108                 }
109
110                 if (restart_from < 0)
111                         break;
112
113                 start = restart_from;
114         }
115
116         return 0;
117 }
118
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120         unsigned i;
121         int r;
122
123         if (n_fds <= 0)
124                 return 0;
125
126         assert(fds);
127
128         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129
130         for (i = 0; i < n_fds; i++) {
131
132                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133                         return r;
134
135                 /* We unconditionally drop FD_CLOEXEC from the fds,
136                  * since after all we want to pass these fds to our
137                  * children */
138
139                 if ((r = fd_cloexec(fds[i], false)) < 0)
140                         return r;
141         }
142
143         return 0;
144 }
145
146 static const char *tty_path(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_path)
150                 return context->tty_path;
151
152         return "/dev/console";
153 }
154
155 void exec_context_tty_reset(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_vhangup)
159                 terminal_vhangup(tty_path(context));
160
161         if (context->tty_reset)
162                 reset_terminal(tty_path(context));
163
164         if (context->tty_vt_disallocate && context->tty_path)
165                 vt_disallocate(context->tty_path);
166 }
167
168 static bool is_terminal_output(ExecOutput o) {
169         return
170                 o == EXEC_OUTPUT_TTY ||
171                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
172                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
173                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
174 }
175
176 static int open_null_as(int flags, int nfd) {
177         int fd, r;
178
179         assert(nfd >= 0);
180
181         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
182                 return -errno;
183
184         if (fd != nfd) {
185                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
186                 close_nointr_nofail(fd);
187         } else
188                 r = nfd;
189
190         return r;
191 }
192
193 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
194         int fd, r;
195         union sockaddr_union sa;
196
197         assert(context);
198         assert(output < _EXEC_OUTPUT_MAX);
199         assert(ident);
200         assert(nfd >= 0);
201
202         fd = socket(AF_UNIX, SOCK_STREAM, 0);
203         if (fd < 0)
204                 return -errno;
205
206         zero(sa);
207         sa.un.sun_family = AF_UNIX;
208         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
209
210         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
211         if (r < 0) {
212                 close_nointr_nofail(fd);
213                 return -errno;
214         }
215
216         if (shutdown(fd, SHUT_RD) < 0) {
217                 close_nointr_nofail(fd);
218                 return -errno;
219         }
220
221         dprintf(fd,
222                 "%s\n"
223                 "%s\n"
224                 "%i\n"
225                 "%i\n"
226                 "%i\n"
227                 "%i\n"
228                 "%i\n",
229                 context->syslog_identifier ? context->syslog_identifier : ident,
230                 unit_id,
231                 context->syslog_priority,
232                 !!context->syslog_level_prefix,
233                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
234                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
235                 is_terminal_output(output));
236
237         if (fd != nfd) {
238                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
239                 close_nointr_nofail(fd);
240         } else
241                 r = nfd;
242
243         return r;
244 }
245 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
246         int fd, r;
247
248         assert(path);
249         assert(nfd >= 0);
250
251         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
252                 return fd;
253
254         if (fd != nfd) {
255                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
256                 close_nointr_nofail(fd);
257         } else
258                 r = nfd;
259
260         return r;
261 }
262
263 static bool is_terminal_input(ExecInput i) {
264         return
265                 i == EXEC_INPUT_TTY ||
266                 i == EXEC_INPUT_TTY_FORCE ||
267                 i == EXEC_INPUT_TTY_FAIL;
268 }
269
270 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
271
272         if (is_terminal_input(std_input) && !apply_tty_stdin)
273                 return EXEC_INPUT_NULL;
274
275         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
276                 return EXEC_INPUT_NULL;
277
278         return std_input;
279 }
280
281 static int fixup_output(ExecOutput std_output, int socket_fd) {
282
283         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
284                 return EXEC_OUTPUT_INHERIT;
285
286         return std_output;
287 }
288
289 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
290         ExecInput i;
291
292         assert(context);
293
294         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
295
296         switch (i) {
297
298         case EXEC_INPUT_NULL:
299                 return open_null_as(O_RDONLY, STDIN_FILENO);
300
301         case EXEC_INPUT_TTY:
302         case EXEC_INPUT_TTY_FORCE:
303         case EXEC_INPUT_TTY_FAIL: {
304                 int fd, r;
305
306                 if ((fd = acquire_terminal(
307                                      tty_path(context),
308                                      i == EXEC_INPUT_TTY_FAIL,
309                                      i == EXEC_INPUT_TTY_FORCE,
310                                      false,
311                                      (usec_t) -1)) < 0)
312                         return fd;
313
314                 if (fd != STDIN_FILENO) {
315                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
316                         close_nointr_nofail(fd);
317                 } else
318                         r = STDIN_FILENO;
319
320                 return r;
321         }
322
323         case EXEC_INPUT_SOCKET:
324                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
325
326         default:
327                 assert_not_reached("Unknown input type");
328         }
329 }
330
331 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
332         ExecOutput o;
333         ExecInput i;
334         int r;
335
336         assert(context);
337         assert(ident);
338
339         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
340         o = fixup_output(context->std_output, socket_fd);
341
342         if (fileno == STDERR_FILENO) {
343                 ExecOutput e;
344                 e = fixup_output(context->std_error, socket_fd);
345
346                 /* This expects the input and output are already set up */
347
348                 /* Don't change the stderr file descriptor if we inherit all
349                  * the way and are not on a tty */
350                 if (e == EXEC_OUTPUT_INHERIT &&
351                     o == EXEC_OUTPUT_INHERIT &&
352                     i == EXEC_INPUT_NULL &&
353                     !is_terminal_input(context->std_input) &&
354                     getppid () != 1)
355                         return fileno;
356
357                 /* Duplicate from stdout if possible */
358                 if (e == o || e == EXEC_OUTPUT_INHERIT)
359                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
360
361                 o = e;
362
363         } else if (o == EXEC_OUTPUT_INHERIT) {
364                 /* If input got downgraded, inherit the original value */
365                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
366                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
367
368                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
369                 if (i != EXEC_INPUT_NULL)
370                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
371
372                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
373                 if (getppid() != 1)
374                         return fileno;
375
376                 /* We need to open /dev/null here anew, to get the right access mode. */
377                 return open_null_as(O_WRONLY, fileno);
378         }
379
380         switch (o) {
381
382         case EXEC_OUTPUT_NULL:
383                 return open_null_as(O_WRONLY, fileno);
384
385         case EXEC_OUTPUT_TTY:
386                 if (is_terminal_input(i))
387                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
388
389                 /* We don't reset the terminal if this is just about output */
390                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
391
392         case EXEC_OUTPUT_SYSLOG:
393         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
394         case EXEC_OUTPUT_KMSG:
395         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
396         case EXEC_OUTPUT_JOURNAL:
397         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
398                 r = connect_logger_as(context, o, ident, unit_id, fileno);
399                 if (r < 0) {
400                         log_struct_unit(LOG_CRIT, unit_id,
401                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
402                                 fileno == STDOUT_FILENO ? "out" : "err",
403                                 unit_id, strerror(-r),
404                                 "ERRNO=%d", -r,
405                                 NULL);
406                         r = open_null_as(O_WRONLY, fileno);
407                 }
408                 return r;
409
410         case EXEC_OUTPUT_SOCKET:
411                 assert(socket_fd >= 0);
412                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
413
414         default:
415                 assert_not_reached("Unknown error type");
416         }
417 }
418
419 static int chown_terminal(int fd, uid_t uid) {
420         struct stat st;
421
422         assert(fd >= 0);
423
424         /* This might fail. What matters are the results. */
425         (void) fchown(fd, uid, -1);
426         (void) fchmod(fd, TTY_MODE);
427
428         if (fstat(fd, &st) < 0)
429                 return -errno;
430
431         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
432                 return -EPERM;
433
434         return 0;
435 }
436
437 static int setup_confirm_stdio(int *_saved_stdin,
438                                int *_saved_stdout) {
439         int fd = -1, saved_stdin, saved_stdout = -1, r;
440
441         assert(_saved_stdin);
442         assert(_saved_stdout);
443
444         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
445         if (saved_stdin < 0)
446                 return -errno;
447
448         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
449         if (saved_stdout < 0) {
450                 r = errno;
451                 goto fail;
452         }
453
454         fd = acquire_terminal(
455                         "/dev/console",
456                         false,
457                         false,
458                         false,
459                         DEFAULT_CONFIRM_USEC);
460         if (fd < 0) {
461                 r = fd;
462                 goto fail;
463         }
464
465         r = chown_terminal(fd, getuid());
466         if (r < 0)
467                 goto fail;
468
469         if (dup2(fd, STDIN_FILENO) < 0) {
470                 r = -errno;
471                 goto fail;
472         }
473
474         if (dup2(fd, STDOUT_FILENO) < 0) {
475                 r = -errno;
476                 goto fail;
477         }
478
479         if (fd >= 2)
480                 close_nointr_nofail(fd);
481
482         *_saved_stdin = saved_stdin;
483         *_saved_stdout = saved_stdout;
484
485         return 0;
486
487 fail:
488         if (saved_stdout >= 0)
489                 close_nointr_nofail(saved_stdout);
490
491         if (saved_stdin >= 0)
492                 close_nointr_nofail(saved_stdin);
493
494         if (fd >= 0)
495                 close_nointr_nofail(fd);
496
497         return r;
498 }
499
500 static int write_confirm_message(const char *format, ...) {
501         int fd;
502         va_list ap;
503
504         assert(format);
505
506         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
507         if (fd < 0)
508                 return fd;
509
510         va_start(ap, format);
511         vdprintf(fd, format, ap);
512         va_end(ap);
513
514         close_nointr_nofail(fd);
515
516         return 0;
517 }
518
519 static int restore_confirm_stdio(int *saved_stdin,
520                                  int *saved_stdout) {
521
522         int r = 0;
523
524         assert(saved_stdin);
525         assert(saved_stdout);
526
527         release_terminal();
528
529         if (*saved_stdin >= 0)
530                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
531                         r = -errno;
532
533         if (*saved_stdout >= 0)
534                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
535                         r = -errno;
536
537         if (*saved_stdin >= 0)
538                 close_nointr_nofail(*saved_stdin);
539
540         if (*saved_stdout >= 0)
541                 close_nointr_nofail(*saved_stdout);
542
543         return r;
544 }
545
546 static int ask_for_confirmation(char *response, char **argv) {
547         int saved_stdout = -1, saved_stdin = -1, r;
548         char *line;
549
550         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
551         if (r < 0)
552                 return r;
553
554         line = exec_command_line(argv);
555         if (!line)
556                 return -ENOMEM;
557
558         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
559         free(line);
560
561         restore_confirm_stdio(&saved_stdin, &saved_stdout);
562
563         return r;
564 }
565
566 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
567         bool keep_groups = false;
568         int r;
569
570         assert(context);
571
572         /* Lookup and set GID and supplementary group list. Here too
573          * we avoid NSS lookups for gid=0. */
574
575         if (context->group || username) {
576
577                 if (context->group) {
578                         const char *g = context->group;
579
580                         if ((r = get_group_creds(&g, &gid)) < 0)
581                                 return r;
582                 }
583
584                 /* First step, initialize groups from /etc/groups */
585                 if (username && gid != 0) {
586                         if (initgroups(username, gid) < 0)
587                                 return -errno;
588
589                         keep_groups = true;
590                 }
591
592                 /* Second step, set our gids */
593                 if (setresgid(gid, gid, gid) < 0)
594                         return -errno;
595         }
596
597         if (context->supplementary_groups) {
598                 int ngroups_max, k;
599                 gid_t *gids;
600                 char **i;
601
602                 /* Final step, initialize any manually set supplementary groups */
603                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
604
605                 if (!(gids = new(gid_t, ngroups_max)))
606                         return -ENOMEM;
607
608                 if (keep_groups) {
609                         if ((k = getgroups(ngroups_max, gids)) < 0) {
610                                 free(gids);
611                                 return -errno;
612                         }
613                 } else
614                         k = 0;
615
616                 STRV_FOREACH(i, context->supplementary_groups) {
617                         const char *g;
618
619                         if (k >= ngroups_max) {
620                                 free(gids);
621                                 return -E2BIG;
622                         }
623
624                         g = *i;
625                         r = get_group_creds(&g, gids+k);
626                         if (r < 0) {
627                                 free(gids);
628                                 return r;
629                         }
630
631                         k++;
632                 }
633
634                 if (setgroups(k, gids) < 0) {
635                         free(gids);
636                         return -errno;
637                 }
638
639                 free(gids);
640         }
641
642         return 0;
643 }
644
645 static int enforce_user(const ExecContext *context, uid_t uid) {
646         int r;
647         assert(context);
648
649         /* Sets (but doesn't lookup) the uid and make sure we keep the
650          * capabilities while doing so. */
651
652         if (context->capabilities) {
653                 cap_t d;
654                 static const cap_value_t bits[] = {
655                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
656                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
657                 };
658
659                 /* First step: If we need to keep capabilities but
660                  * drop privileges we need to make sure we keep our
661                  * caps, whiel we drop privileges. */
662                 if (uid != 0) {
663                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
664
665                         if (prctl(PR_GET_SECUREBITS) != sb)
666                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
667                                         return -errno;
668                 }
669
670                 /* Second step: set the capabilities. This will reduce
671                  * the capabilities to the minimum we need. */
672
673                 if (!(d = cap_dup(context->capabilities)))
674                         return -errno;
675
676                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
677                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
678                         r = -errno;
679                         cap_free(d);
680                         return r;
681                 }
682
683                 if (cap_set_proc(d) < 0) {
684                         r = -errno;
685                         cap_free(d);
686                         return r;
687                 }
688
689                 cap_free(d);
690         }
691
692         /* Third step: actually set the uids */
693         if (setresuid(uid, uid, uid) < 0)
694                 return -errno;
695
696         /* At this point we should have all necessary capabilities but
697            are otherwise a normal user. However, the caps might got
698            corrupted due to the setresuid() so we need clean them up
699            later. This is done outside of this call. */
700
701         return 0;
702 }
703
704 #ifdef HAVE_PAM
705
706 static int null_conv(
707                 int num_msg,
708                 const struct pam_message **msg,
709                 struct pam_response **resp,
710                 void *appdata_ptr) {
711
712         /* We don't support conversations */
713
714         return PAM_CONV_ERR;
715 }
716
717 static int setup_pam(
718                 const char *name,
719                 const char *user,
720                 uid_t uid,
721                 const char *tty,
722                 char ***pam_env,
723                 int fds[], unsigned n_fds) {
724
725         static const struct pam_conv conv = {
726                 .conv = null_conv,
727                 .appdata_ptr = NULL
728         };
729
730         pam_handle_t *handle = NULL;
731         sigset_t ss, old_ss;
732         int pam_code = PAM_SUCCESS;
733         int err;
734         char **e = NULL;
735         bool close_session = false;
736         pid_t pam_pid = 0, parent_pid;
737
738         assert(name);
739         assert(user);
740         assert(pam_env);
741
742         /* We set up PAM in the parent process, then fork. The child
743          * will then stay around until killed via PR_GET_PDEATHSIG or
744          * systemd via the cgroup logic. It will then remove the PAM
745          * session again. The parent process will exec() the actual
746          * daemon. We do things this way to ensure that the main PID
747          * of the daemon is the one we initially fork()ed. */
748
749         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
750                 handle = NULL;
751                 goto fail;
752         }
753
754         if (tty)
755                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
756                         goto fail;
757
758         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
759                 goto fail;
760
761         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
762                 goto fail;
763
764         close_session = true;
765
766         if ((!(e = pam_getenvlist(handle)))) {
767                 pam_code = PAM_BUF_ERR;
768                 goto fail;
769         }
770
771         /* Block SIGTERM, so that we know that it won't get lost in
772          * the child */
773         if (sigemptyset(&ss) < 0 ||
774             sigaddset(&ss, SIGTERM) < 0 ||
775             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
776                 goto fail;
777
778         parent_pid = getpid();
779
780         if ((pam_pid = fork()) < 0)
781                 goto fail;
782
783         if (pam_pid == 0) {
784                 int sig;
785                 int r = EXIT_PAM;
786
787                 /* The child's job is to reset the PAM session on
788                  * termination */
789
790                 /* This string must fit in 10 chars (i.e. the length
791                  * of "/sbin/init"), to look pretty in /bin/ps */
792                 rename_process("(sd-pam)");
793
794                 /* Make sure we don't keep open the passed fds in this
795                 child. We assume that otherwise only those fds are
796                 open here that have been opened by PAM. */
797                 close_many(fds, n_fds);
798
799                 /* Drop privileges - we don't need any to pam_close_session
800                  * and this will make PR_SET_PDEATHSIG work in most cases.
801                  * If this fails, ignore the error - but expect sd-pam threads
802                  * to fail to exit normally */
803                 if (setresuid(uid, uid, uid) < 0)
804                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
805
806                 /* Wait until our parent died. This will only work if
807                  * the above setresuid() succeeds, otherwise the kernel
808                  * will not allow unprivileged parents kill their privileged
809                  * children this way. We rely on the control groups kill logic
810                  * to do the rest for us. */
811                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
812                         goto child_finish;
813
814                 /* Check if our parent process might already have
815                  * died? */
816                 if (getppid() == parent_pid) {
817                         for (;;) {
818                                 if (sigwait(&ss, &sig) < 0) {
819                                         if (errno == EINTR)
820                                                 continue;
821
822                                         goto child_finish;
823                                 }
824
825                                 assert(sig == SIGTERM);
826                                 break;
827                         }
828                 }
829
830                 /* If our parent died we'll end the session */
831                 if (getppid() != parent_pid)
832                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
833                                 goto child_finish;
834
835                 r = 0;
836
837         child_finish:
838                 pam_end(handle, pam_code | PAM_DATA_SILENT);
839                 _exit(r);
840         }
841
842         /* If the child was forked off successfully it will do all the
843          * cleanups, so forget about the handle here. */
844         handle = NULL;
845
846         /* Unblock SIGTERM again in the parent */
847         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
848                 goto fail;
849
850         /* We close the log explicitly here, since the PAM modules
851          * might have opened it, but we don't want this fd around. */
852         closelog();
853
854         *pam_env = e;
855         e = NULL;
856
857         return 0;
858
859 fail:
860         if (pam_code != PAM_SUCCESS)
861                 err = -EPERM;  /* PAM errors do not map to errno */
862         else
863                 err = -errno;
864
865         if (handle) {
866                 if (close_session)
867                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
868
869                 pam_end(handle, pam_code | PAM_DATA_SILENT);
870         }
871
872         strv_free(e);
873
874         closelog();
875
876         if (pam_pid > 1) {
877                 kill(pam_pid, SIGTERM);
878                 kill(pam_pid, SIGCONT);
879         }
880
881         return err;
882 }
883 #endif
884
885 static void rename_process_from_path(const char *path) {
886         char process_name[11];
887         const char *p;
888         size_t l;
889
890         /* This resulting string must fit in 10 chars (i.e. the length
891          * of "/sbin/init") to look pretty in /bin/ps */
892
893         p = path_get_file_name(path);
894         if (isempty(p)) {
895                 rename_process("(...)");
896                 return;
897         }
898
899         l = strlen(p);
900         if (l > 8) {
901                 /* The end of the process name is usually more
902                  * interesting, since the first bit might just be
903                  * "systemd-" */
904                 p = p + l - 8;
905                 l = 8;
906         }
907
908         process_name[0] = '(';
909         memcpy(process_name+1, p, l);
910         process_name[1+l] = ')';
911         process_name[1+l+1] = 0;
912
913         rename_process(process_name);
914 }
915
916 static int apply_seccomp(uint32_t *syscall_filter) {
917         static const struct sock_filter header[] = {
918                 VALIDATE_ARCHITECTURE,
919                 EXAMINE_SYSCALL
920         };
921         static const struct sock_filter footer[] = {
922                 _KILL_PROCESS
923         };
924
925         int i;
926         unsigned n;
927         struct sock_filter *f;
928         struct sock_fprog prog;
929
930         assert(syscall_filter);
931
932         /* First: count the syscalls to check for */
933         for (i = 0, n = 0; i < syscall_max(); i++)
934                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
935                         n++;
936
937         /* Second: build the filter program from a header the syscall
938          * matches and the footer */
939         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
940         memcpy(f, header, sizeof(header));
941
942         for (i = 0, n = 0; i < syscall_max(); i++)
943                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
944                         struct sock_filter item[] = {
945                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
946                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
947                         };
948
949                         assert_cc(ELEMENTSOF(item) == 2);
950
951                         f[ELEMENTSOF(header) + 2*n]  = item[0];
952                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
953
954                         n++;
955                 }
956
957         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
958
959         /* Third: install the filter */
960         zero(prog);
961         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
962         prog.filter = f;
963         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
964                 return -errno;
965
966         return 0;
967 }
968
969 int exec_spawn(ExecCommand *command,
970                char **argv,
971                const ExecContext *context,
972                int fds[], unsigned n_fds,
973                char **environment,
974                bool apply_permissions,
975                bool apply_chroot,
976                bool apply_tty_stdin,
977                bool confirm_spawn,
978                CGroupBonding *cgroup_bondings,
979                CGroupAttribute *cgroup_attributes,
980                const char *cgroup_suffix,
981                const char *unit_id,
982                int idle_pipe[2],
983                pid_t *ret) {
984
985         pid_t pid;
986         int r;
987         char *line;
988         int socket_fd;
989         char _cleanup_strv_free_ **files_env = NULL;
990
991         assert(command);
992         assert(context);
993         assert(ret);
994         assert(fds || n_fds <= 0);
995
996         if (context->std_input == EXEC_INPUT_SOCKET ||
997             context->std_output == EXEC_OUTPUT_SOCKET ||
998             context->std_error == EXEC_OUTPUT_SOCKET) {
999
1000                 if (n_fds != 1)
1001                         return -EINVAL;
1002
1003                 socket_fd = fds[0];
1004
1005                 fds = NULL;
1006                 n_fds = 0;
1007         } else
1008                 socket_fd = -1;
1009
1010         r = exec_context_load_environment(context, &files_env);
1011         if (r < 0) {
1012                 log_struct_unit(LOG_ERR,
1013                            unit_id,
1014                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1015                            "ERRNO=%d", -r,
1016                            NULL);
1017                 return r;
1018         }
1019
1020         if (!argv)
1021                 argv = command->argv;
1022
1023         line = exec_command_line(argv);
1024         if (!line)
1025                 return log_oom();
1026
1027         log_struct_unit(LOG_DEBUG,
1028                    unit_id,
1029                    "MESSAGE=About to execute %s", line,
1030                    NULL);
1031         free(line);
1032
1033         r = cgroup_bonding_realize_list(cgroup_bondings);
1034         if (r < 0)
1035                 return r;
1036
1037         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1038
1039         pid = fork();
1040         if (pid < 0)
1041                 return -errno;
1042
1043         if (pid == 0) {
1044                 int i, err;
1045                 sigset_t ss;
1046                 const char *username = NULL, *home = NULL;
1047                 uid_t uid = (uid_t) -1;
1048                 gid_t gid = (gid_t) -1;
1049                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1050                         **final_env = NULL, **final_argv = NULL;
1051                 unsigned n_env = 0;
1052                 bool set_access = false;
1053
1054                 /* child */
1055
1056                 rename_process_from_path(command->path);
1057
1058                 /* We reset exactly these signals, since they are the
1059                  * only ones we set to SIG_IGN in the main daemon. All
1060                  * others we leave untouched because we set them to
1061                  * SIG_DFL or a valid handler initially, both of which
1062                  * will be demoted to SIG_DFL. */
1063                 default_signals(SIGNALS_CRASH_HANDLER,
1064                                 SIGNALS_IGNORE, -1);
1065
1066                 if (context->ignore_sigpipe)
1067                         ignore_signals(SIGPIPE, -1);
1068
1069                 assert_se(sigemptyset(&ss) == 0);
1070                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1071                         err = -errno;
1072                         r = EXIT_SIGNAL_MASK;
1073                         goto fail_child;
1074                 }
1075
1076                 if (idle_pipe) {
1077                         if (idle_pipe[1] >= 0)
1078                                 close_nointr_nofail(idle_pipe[1]);
1079                         if (idle_pipe[0] >= 0) {
1080                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1081                                 close_nointr_nofail(idle_pipe[0]);
1082                         }
1083                 }
1084
1085                 /* Close sockets very early to make sure we don't
1086                  * block init reexecution because it cannot bind its
1087                  * sockets */
1088                 log_forget_fds();
1089                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1090                                            socket_fd >= 0 ? 1 : n_fds);
1091                 if (err < 0) {
1092                         r = EXIT_FDS;
1093                         goto fail_child;
1094                 }
1095
1096                 if (!context->same_pgrp)
1097                         if (setsid() < 0) {
1098                                 err = -errno;
1099                                 r = EXIT_SETSID;
1100                                 goto fail_child;
1101                         }
1102
1103                 if (context->tcpwrap_name) {
1104                         if (socket_fd >= 0)
1105                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1106                                         err = -EACCES;
1107                                         r = EXIT_TCPWRAP;
1108                                         goto fail_child;
1109                                 }
1110
1111                         for (i = 0; i < (int) n_fds; i++) {
1112                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1113                                         err = -EACCES;
1114                                         r = EXIT_TCPWRAP;
1115                                         goto fail_child;
1116                                 }
1117                         }
1118                 }
1119
1120                 exec_context_tty_reset(context);
1121
1122                 if (confirm_spawn) {
1123                         char response;
1124
1125                         err = ask_for_confirmation(&response, argv);
1126                         if (err == -ETIMEDOUT)
1127                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1128                         else if (err < 0)
1129                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1130                         else if (response == 's') {
1131                                 write_confirm_message("Skipping execution.\n");
1132                                 err = -ECANCELED;
1133                                 r = EXIT_CONFIRM;
1134                                 goto fail_child;
1135                         } else if (response == 'n') {
1136                                 write_confirm_message("Failing execution.\n");
1137                                 err = r = 0;
1138                                 goto fail_child;
1139                         }
1140                 }
1141
1142                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1143                  * must sure to drop O_NONBLOCK */
1144                 if (socket_fd >= 0)
1145                         fd_nonblock(socket_fd, false);
1146
1147                 err = setup_input(context, socket_fd, apply_tty_stdin);
1148                 if (err < 0) {
1149                         r = EXIT_STDIN;
1150                         goto fail_child;
1151                 }
1152
1153                 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1154                 if (err < 0) {
1155                         r = EXIT_STDOUT;
1156                         goto fail_child;
1157                 }
1158
1159                 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1160                 if (err < 0) {
1161                         r = EXIT_STDERR;
1162                         goto fail_child;
1163                 }
1164
1165                 if (cgroup_bondings) {
1166                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1167                         if (err < 0) {
1168                                 r = EXIT_CGROUP;
1169                                 goto fail_child;
1170                         }
1171                 }
1172
1173                 if (context->oom_score_adjust_set) {
1174                         char t[16];
1175
1176                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1177                         char_array_0(t);
1178
1179                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1180                                 err = -errno;
1181                                 r = EXIT_OOM_ADJUST;
1182                                 goto fail_child;
1183                         }
1184                 }
1185
1186                 if (context->nice_set)
1187                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1188                                 err = -errno;
1189                                 r = EXIT_NICE;
1190                                 goto fail_child;
1191                         }
1192
1193                 if (context->cpu_sched_set) {
1194                         struct sched_param param;
1195
1196                         zero(param);
1197                         param.sched_priority = context->cpu_sched_priority;
1198
1199                         if (sched_setscheduler(0, context->cpu_sched_policy |
1200                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1201                                 err = -errno;
1202                                 r = EXIT_SETSCHEDULER;
1203                                 goto fail_child;
1204                         }
1205                 }
1206
1207                 if (context->cpuset)
1208                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1209                                 err = -errno;
1210                                 r = EXIT_CPUAFFINITY;
1211                                 goto fail_child;
1212                         }
1213
1214                 if (context->ioprio_set)
1215                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1216                                 err = -errno;
1217                                 r = EXIT_IOPRIO;
1218                                 goto fail_child;
1219                         }
1220
1221                 if (context->timer_slack_nsec != (nsec_t) -1)
1222                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1223                                 err = -errno;
1224                                 r = EXIT_TIMERSLACK;
1225                                 goto fail_child;
1226                         }
1227
1228                 if (context->utmp_id)
1229                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1230
1231                 if (context->user) {
1232                         username = context->user;
1233                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1234                         if (err < 0) {
1235                                 r = EXIT_USER;
1236                                 goto fail_child;
1237                         }
1238
1239                         if (is_terminal_input(context->std_input)) {
1240                                 err = chown_terminal(STDIN_FILENO, uid);
1241                                 if (err < 0) {
1242                                         r = EXIT_STDIN;
1243                                         goto fail_child;
1244                                 }
1245                         }
1246
1247                         if (cgroup_bondings && context->control_group_modify) {
1248                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1249                                 if (err >= 0)
1250                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1251                                 if (err < 0) {
1252                                         r = EXIT_CGROUP;
1253                                         goto fail_child;
1254                                 }
1255
1256                                 set_access = true;
1257                         }
1258                 }
1259
1260                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1261                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1262                         if (err < 0) {
1263                                 r = EXIT_CGROUP;
1264                                 goto fail_child;
1265                         }
1266                 }
1267
1268                 if (apply_permissions) {
1269                         err = enforce_groups(context, username, gid);
1270                         if (err < 0) {
1271                                 r = EXIT_GROUP;
1272                                 goto fail_child;
1273                         }
1274                 }
1275
1276                 umask(context->umask);
1277
1278 #ifdef HAVE_PAM
1279                 if (apply_permissions && context->pam_name && username) {
1280                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1281                         if (err < 0) {
1282                                 r = EXIT_PAM;
1283                                 goto fail_child;
1284                         }
1285                 }
1286 #endif
1287                 if (context->private_network) {
1288                         if (unshare(CLONE_NEWNET) < 0) {
1289                                 err = -errno;
1290                                 r = EXIT_NETWORK;
1291                                 goto fail_child;
1292                         }
1293
1294                         loopback_setup();
1295                 }
1296
1297                 if (strv_length(context->read_write_dirs) > 0 ||
1298                     strv_length(context->read_only_dirs) > 0 ||
1299                     strv_length(context->inaccessible_dirs) > 0 ||
1300                     context->mount_flags != 0 ||
1301                     context->private_tmp) {
1302                         err = setup_namespace(context->read_write_dirs,
1303                                               context->read_only_dirs,
1304                                               context->inaccessible_dirs,
1305                                               context->private_tmp,
1306                                               context->mount_flags);
1307                         if (err < 0) {
1308                                 r = EXIT_NAMESPACE;
1309                                 goto fail_child;
1310                         }
1311                 }
1312
1313                 if (apply_chroot) {
1314                         if (context->root_directory)
1315                                 if (chroot(context->root_directory) < 0) {
1316                                         err = -errno;
1317                                         r = EXIT_CHROOT;
1318                                         goto fail_child;
1319                                 }
1320
1321                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1322                                 err = -errno;
1323                                 r = EXIT_CHDIR;
1324                                 goto fail_child;
1325                         }
1326                 } else {
1327                         char _cleanup_free_ *d = NULL;
1328
1329                         if (asprintf(&d, "%s/%s",
1330                                      context->root_directory ? context->root_directory : "",
1331                                      context->working_directory ? context->working_directory : "") < 0) {
1332                                 err = -ENOMEM;
1333                                 r = EXIT_MEMORY;
1334                                 goto fail_child;
1335                         }
1336
1337                         if (chdir(d) < 0) {
1338                                 err = -errno;
1339                                 r = EXIT_CHDIR;
1340                                 goto fail_child;
1341                         }
1342                 }
1343
1344                 /* We repeat the fd closing here, to make sure that
1345                  * nothing is leaked from the PAM modules */
1346                 err = close_all_fds(fds, n_fds);
1347                 if (err >= 0)
1348                         err = shift_fds(fds, n_fds);
1349                 if (err >= 0)
1350                         err = flags_fds(fds, n_fds, context->non_blocking);
1351                 if (err < 0) {
1352                         r = EXIT_FDS;
1353                         goto fail_child;
1354                 }
1355
1356                 if (apply_permissions) {
1357
1358                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1359                                 if (!context->rlimit[i])
1360                                         continue;
1361
1362                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1363                                         err = -errno;
1364                                         r = EXIT_LIMITS;
1365                                         goto fail_child;
1366                                 }
1367                         }
1368
1369                         if (context->capability_bounding_set_drop) {
1370                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1371                                 if (err < 0) {
1372                                         r = EXIT_CAPABILITIES;
1373                                         goto fail_child;
1374                                 }
1375                         }
1376
1377                         if (context->user) {
1378                                 err = enforce_user(context, uid);
1379                                 if (err < 0) {
1380                                         r = EXIT_USER;
1381                                         goto fail_child;
1382                                 }
1383                         }
1384
1385                         /* PR_GET_SECUREBITS is not privileged, while
1386                          * PR_SET_SECUREBITS is. So to suppress
1387                          * potential EPERMs we'll try not to call
1388                          * PR_SET_SECUREBITS unless necessary. */
1389                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1390                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1391                                         err = -errno;
1392                                         r = EXIT_SECUREBITS;
1393                                         goto fail_child;
1394                                 }
1395
1396                         if (context->capabilities)
1397                                 if (cap_set_proc(context->capabilities) < 0) {
1398                                         err = -errno;
1399                                         r = EXIT_CAPABILITIES;
1400                                         goto fail_child;
1401                                 }
1402
1403                         if (context->no_new_privileges)
1404                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1405                                         err = -errno;
1406                                         r = EXIT_NO_NEW_PRIVILEGES;
1407                                         goto fail_child;
1408                                 }
1409
1410                         if (context->syscall_filter) {
1411                                 err = apply_seccomp(context->syscall_filter);
1412                                 if (err < 0) {
1413                                         r = EXIT_SECCOMP;
1414                                         goto fail_child;
1415                                 }
1416                         }
1417                 }
1418
1419                 if (!(our_env = new0(char*, 7))) {
1420                         err = -ENOMEM;
1421                         r = EXIT_MEMORY;
1422                         goto fail_child;
1423                 }
1424
1425                 if (n_fds > 0)
1426                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1427                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1428                                 err = -ENOMEM;
1429                                 r = EXIT_MEMORY;
1430                                 goto fail_child;
1431                         }
1432
1433                 if (home)
1434                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1435                                 err = -ENOMEM;
1436                                 r = EXIT_MEMORY;
1437                                 goto fail_child;
1438                         }
1439
1440                 if (username)
1441                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1442                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1443                                 err = -ENOMEM;
1444                                 r = EXIT_MEMORY;
1445                                 goto fail_child;
1446                         }
1447
1448                 if (is_terminal_input(context->std_input) ||
1449                     context->std_output == EXEC_OUTPUT_TTY ||
1450                     context->std_error == EXEC_OUTPUT_TTY)
1451                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1452                                 err = -ENOMEM;
1453                                 r = EXIT_MEMORY;
1454                                 goto fail_child;
1455                         }
1456
1457                 assert(n_env <= 7);
1458
1459                 if (!(final_env = strv_env_merge(
1460                                       5,
1461                                       environment,
1462                                       our_env,
1463                                       context->environment,
1464                                       files_env,
1465                                       pam_env,
1466                                       NULL))) {
1467                         err = -ENOMEM;
1468                         r = EXIT_MEMORY;
1469                         goto fail_child;
1470                 }
1471
1472                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1473                         err = -ENOMEM;
1474                         r = EXIT_MEMORY;
1475                         goto fail_child;
1476                 }
1477
1478                 final_env = strv_env_clean(final_env);
1479
1480                 execve(command->path, final_argv, final_env);
1481                 err = -errno;
1482                 r = EXIT_EXEC;
1483
1484         fail_child:
1485                 if (r != 0) {
1486                         log_open();
1487                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1488                                    "EXECUTABLE=%s", command->path,
1489                                    "MESSAGE=Failed at step %s spawning %s: %s",
1490                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1491                                           command->path, strerror(-err),
1492                                    "ERRNO=%d", -err,
1493                                    NULL);
1494                         log_close();
1495                 }
1496
1497                 _exit(r);
1498         }
1499
1500         log_struct_unit(LOG_DEBUG,
1501                    unit_id,
1502                    "MESSAGE=Forked %s as %lu",
1503                           command->path, (unsigned long) pid,
1504                    NULL);
1505
1506         /* We add the new process to the cgroup both in the child (so
1507          * that we can be sure that no user code is ever executed
1508          * outside of the cgroup) and in the parent (so that we can be
1509          * sure that when we kill the cgroup the process will be
1510          * killed too). */
1511         if (cgroup_bondings)
1512                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1513
1514         exec_status_start(&command->exec_status, pid);
1515
1516         *ret = pid;
1517         return 0;
1518 }
1519
1520 void exec_context_init(ExecContext *c) {
1521         assert(c);
1522
1523         c->umask = 0022;
1524         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1525         c->cpu_sched_policy = SCHED_OTHER;
1526         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1527         c->syslog_level_prefix = true;
1528         c->control_group_persistent = -1;
1529         c->ignore_sigpipe = true;
1530         c->timer_slack_nsec = (nsec_t) -1;
1531 }
1532
1533 void exec_context_done(ExecContext *c) {
1534         unsigned l;
1535
1536         assert(c);
1537
1538         strv_free(c->environment);
1539         c->environment = NULL;
1540
1541         strv_free(c->environment_files);
1542         c->environment_files = NULL;
1543
1544         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1545                 free(c->rlimit[l]);
1546                 c->rlimit[l] = NULL;
1547         }
1548
1549         free(c->working_directory);
1550         c->working_directory = NULL;
1551         free(c->root_directory);
1552         c->root_directory = NULL;
1553
1554         free(c->tty_path);
1555         c->tty_path = NULL;
1556
1557         free(c->tcpwrap_name);
1558         c->tcpwrap_name = NULL;
1559
1560         free(c->syslog_identifier);
1561         c->syslog_identifier = NULL;
1562
1563         free(c->user);
1564         c->user = NULL;
1565
1566         free(c->group);
1567         c->group = NULL;
1568
1569         strv_free(c->supplementary_groups);
1570         c->supplementary_groups = NULL;
1571
1572         free(c->pam_name);
1573         c->pam_name = NULL;
1574
1575         if (c->capabilities) {
1576                 cap_free(c->capabilities);
1577                 c->capabilities = NULL;
1578         }
1579
1580         strv_free(c->read_only_dirs);
1581         c->read_only_dirs = NULL;
1582
1583         strv_free(c->read_write_dirs);
1584         c->read_write_dirs = NULL;
1585
1586         strv_free(c->inaccessible_dirs);
1587         c->inaccessible_dirs = NULL;
1588
1589         if (c->cpuset)
1590                 CPU_FREE(c->cpuset);
1591
1592         free(c->utmp_id);
1593         c->utmp_id = NULL;
1594
1595         free(c->syscall_filter);
1596         c->syscall_filter = NULL;
1597 }
1598
1599 void exec_command_done(ExecCommand *c) {
1600         assert(c);
1601
1602         free(c->path);
1603         c->path = NULL;
1604
1605         strv_free(c->argv);
1606         c->argv = NULL;
1607 }
1608
1609 void exec_command_done_array(ExecCommand *c, unsigned n) {
1610         unsigned i;
1611
1612         for (i = 0; i < n; i++)
1613                 exec_command_done(c+i);
1614 }
1615
1616 void exec_command_free_list(ExecCommand *c) {
1617         ExecCommand *i;
1618
1619         while ((i = c)) {
1620                 LIST_REMOVE(ExecCommand, command, c, i);
1621                 exec_command_done(i);
1622                 free(i);
1623         }
1624 }
1625
1626 void exec_command_free_array(ExecCommand **c, unsigned n) {
1627         unsigned i;
1628
1629         for (i = 0; i < n; i++) {
1630                 exec_command_free_list(c[i]);
1631                 c[i] = NULL;
1632         }
1633 }
1634
1635 int exec_context_load_environment(const ExecContext *c, char ***l) {
1636         char **i, **r = NULL;
1637
1638         assert(c);
1639         assert(l);
1640
1641         STRV_FOREACH(i, c->environment_files) {
1642                 char *fn;
1643                 int k;
1644                 bool ignore = false;
1645                 char **p;
1646                 glob_t pglob;
1647                 int count, n;
1648
1649                 fn = *i;
1650
1651                 if (fn[0] == '-') {
1652                         ignore = true;
1653                         fn ++;
1654                 }
1655
1656                 if (!path_is_absolute(fn)) {
1657
1658                         if (ignore)
1659                                 continue;
1660
1661                         strv_free(r);
1662                         return -EINVAL;
1663                 }
1664
1665                 /* Filename supports globbing, take all matching files */
1666                 zero(pglob);
1667                 errno = 0;
1668                 if (glob(fn, 0, NULL, &pglob) != 0) {
1669                         globfree(&pglob);
1670                         if (ignore)
1671                                 continue;
1672
1673                         strv_free(r);
1674                         return errno ? -errno : -EINVAL;
1675                 }
1676                 count = pglob.gl_pathc;
1677                 if (count == 0) {
1678                         globfree(&pglob);
1679                         if (ignore)
1680                                 continue;
1681
1682                         strv_free(r);
1683                         return -EINVAL;
1684                 }
1685                 for (n = 0; n < count; n++) {
1686                         k = load_env_file(pglob.gl_pathv[n], &p);
1687                         if (k < 0) {
1688                                 if (ignore)
1689                                         continue;
1690
1691                                 strv_free(r);
1692                                 globfree(&pglob);
1693                                 return k;
1694                          }
1695
1696                         if (r == NULL)
1697                                 r = p;
1698                         else {
1699                                 char **m;
1700
1701                                 m = strv_env_merge(2, r, p);
1702                                 strv_free(r);
1703                                 strv_free(p);
1704
1705                                 if (!m) {
1706                                         globfree(&pglob);
1707                                         return -ENOMEM;
1708                                 }
1709
1710                                 r = m;
1711                         }
1712                 }
1713                 globfree(&pglob);
1714         }
1715
1716         *l = r;
1717
1718         return 0;
1719 }
1720
1721 static bool tty_may_match_dev_console(const char *tty) {
1722         char *active = NULL, *console;
1723         bool b;
1724
1725         if (startswith(tty, "/dev/"))
1726                 tty += 5;
1727
1728         /* trivial identity? */
1729         if (streq(tty, "console"))
1730                 return true;
1731
1732         console = resolve_dev_console(&active);
1733         /* if we could not resolve, assume it may */
1734         if (!console)
1735                 return true;
1736
1737         /* "tty0" means the active VC, so it may be the same sometimes */
1738         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1739         free(active);
1740
1741         return b;
1742 }
1743
1744 bool exec_context_may_touch_console(ExecContext *ec) {
1745         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1746                 is_terminal_input(ec->std_input) ||
1747                 is_terminal_output(ec->std_output) ||
1748                 is_terminal_output(ec->std_error)) &&
1749                tty_may_match_dev_console(tty_path(ec));
1750 }
1751
1752 static void strv_fprintf(FILE *f, char **l) {
1753         char **g;
1754
1755         assert(f);
1756
1757         STRV_FOREACH(g, l)
1758                 fprintf(f, " %s", *g);
1759 }
1760
1761 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1762         char ** e;
1763         unsigned i;
1764
1765         assert(c);
1766         assert(f);
1767
1768         if (!prefix)
1769                 prefix = "";
1770
1771         fprintf(f,
1772                 "%sUMask: %04o\n"
1773                 "%sWorkingDirectory: %s\n"
1774                 "%sRootDirectory: %s\n"
1775                 "%sNonBlocking: %s\n"
1776                 "%sPrivateTmp: %s\n"
1777                 "%sControlGroupModify: %s\n"
1778                 "%sControlGroupPersistent: %s\n"
1779                 "%sPrivateNetwork: %s\n"
1780                 "%sIgnoreSIGPIPE: %s\n",
1781                 prefix, c->umask,
1782                 prefix, c->working_directory ? c->working_directory : "/",
1783                 prefix, c->root_directory ? c->root_directory : "/",
1784                 prefix, yes_no(c->non_blocking),
1785                 prefix, yes_no(c->private_tmp),
1786                 prefix, yes_no(c->control_group_modify),
1787                 prefix, yes_no(c->control_group_persistent),
1788                 prefix, yes_no(c->private_network),
1789                 prefix, yes_no(c->ignore_sigpipe));
1790
1791         STRV_FOREACH(e, c->environment)
1792                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1793
1794         STRV_FOREACH(e, c->environment_files)
1795                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1796
1797         if (c->tcpwrap_name)
1798                 fprintf(f,
1799                         "%sTCPWrapName: %s\n",
1800                         prefix, c->tcpwrap_name);
1801
1802         if (c->nice_set)
1803                 fprintf(f,
1804                         "%sNice: %i\n",
1805                         prefix, c->nice);
1806
1807         if (c->oom_score_adjust_set)
1808                 fprintf(f,
1809                         "%sOOMScoreAdjust: %i\n",
1810                         prefix, c->oom_score_adjust);
1811
1812         for (i = 0; i < RLIM_NLIMITS; i++)
1813                 if (c->rlimit[i])
1814                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1815
1816         if (c->ioprio_set) {
1817                 char *class_str;
1818                 int r;
1819
1820                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1821                 if (r < 0)
1822                         class_str = NULL;
1823                 fprintf(f,
1824                         "%sIOSchedulingClass: %s\n"
1825                         "%sIOPriority: %i\n",
1826                         prefix, strna(class_str),
1827                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1828                 free(class_str);
1829         }
1830
1831         if (c->cpu_sched_set) {
1832                 char *policy_str;
1833                 int r;
1834
1835                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1836                 if (r < 0)
1837                         policy_str = NULL;
1838                 fprintf(f,
1839                         "%sCPUSchedulingPolicy: %s\n"
1840                         "%sCPUSchedulingPriority: %i\n"
1841                         "%sCPUSchedulingResetOnFork: %s\n",
1842                         prefix, strna(policy_str),
1843                         prefix, c->cpu_sched_priority,
1844                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1845                 free(policy_str);
1846         }
1847
1848         if (c->cpuset) {
1849                 fprintf(f, "%sCPUAffinity:", prefix);
1850                 for (i = 0; i < c->cpuset_ncpus; i++)
1851                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1852                                 fprintf(f, " %i", i);
1853                 fputs("\n", f);
1854         }
1855
1856         if (c->timer_slack_nsec != (nsec_t) -1)
1857                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1858
1859         fprintf(f,
1860                 "%sStandardInput: %s\n"
1861                 "%sStandardOutput: %s\n"
1862                 "%sStandardError: %s\n",
1863                 prefix, exec_input_to_string(c->std_input),
1864                 prefix, exec_output_to_string(c->std_output),
1865                 prefix, exec_output_to_string(c->std_error));
1866
1867         if (c->tty_path)
1868                 fprintf(f,
1869                         "%sTTYPath: %s\n"
1870                         "%sTTYReset: %s\n"
1871                         "%sTTYVHangup: %s\n"
1872                         "%sTTYVTDisallocate: %s\n",
1873                         prefix, c->tty_path,
1874                         prefix, yes_no(c->tty_reset),
1875                         prefix, yes_no(c->tty_vhangup),
1876                         prefix, yes_no(c->tty_vt_disallocate));
1877
1878         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1879             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1880             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1881             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1882                 char *fac_str, *lvl_str;
1883                 int r;
1884
1885                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1886                 if (r < 0)
1887                         fac_str = NULL;
1888
1889                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1890                 if (r < 0)
1891                         lvl_str = NULL;
1892
1893                 fprintf(f,
1894                         "%sSyslogFacility: %s\n"
1895                         "%sSyslogLevel: %s\n",
1896                         prefix, strna(fac_str),
1897                         prefix, strna(lvl_str));
1898                 free(lvl_str);
1899                 free(fac_str);
1900         }
1901
1902         if (c->capabilities) {
1903                 char *t;
1904                 if ((t = cap_to_text(c->capabilities, NULL))) {
1905                         fprintf(f, "%sCapabilities: %s\n",
1906                                 prefix, t);
1907                         cap_free(t);
1908                 }
1909         }
1910
1911         if (c->secure_bits)
1912                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1913                         prefix,
1914                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1915                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1916                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1917                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1918                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1919                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1920
1921         if (c->capability_bounding_set_drop) {
1922                 unsigned long l;
1923                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1924
1925                 for (l = 0; l <= cap_last_cap(); l++)
1926                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1927                                 char *t;
1928
1929                                 if ((t = cap_to_name(l))) {
1930                                         fprintf(f, " %s", t);
1931                                         cap_free(t);
1932                                 }
1933                         }
1934
1935                 fputs("\n", f);
1936         }
1937
1938         if (c->user)
1939                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1940         if (c->group)
1941                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1942
1943         if (strv_length(c->supplementary_groups) > 0) {
1944                 fprintf(f, "%sSupplementaryGroups:", prefix);
1945                 strv_fprintf(f, c->supplementary_groups);
1946                 fputs("\n", f);
1947         }
1948
1949         if (c->pam_name)
1950                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1951
1952         if (strv_length(c->read_write_dirs) > 0) {
1953                 fprintf(f, "%sReadWriteDirs:", prefix);
1954                 strv_fprintf(f, c->read_write_dirs);
1955                 fputs("\n", f);
1956         }
1957
1958         if (strv_length(c->read_only_dirs) > 0) {
1959                 fprintf(f, "%sReadOnlyDirs:", prefix);
1960                 strv_fprintf(f, c->read_only_dirs);
1961                 fputs("\n", f);
1962         }
1963
1964         if (strv_length(c->inaccessible_dirs) > 0) {
1965                 fprintf(f, "%sInaccessibleDirs:", prefix);
1966                 strv_fprintf(f, c->inaccessible_dirs);
1967                 fputs("\n", f);
1968         }
1969
1970         if (c->utmp_id)
1971                 fprintf(f,
1972                         "%sUtmpIdentifier: %s\n",
1973                         prefix, c->utmp_id);
1974 }
1975
1976 void exec_status_start(ExecStatus *s, pid_t pid) {
1977         assert(s);
1978
1979         zero(*s);
1980         s->pid = pid;
1981         dual_timestamp_get(&s->start_timestamp);
1982 }
1983
1984 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1985         assert(s);
1986
1987         if (s->pid && s->pid != pid)
1988                 zero(*s);
1989
1990         s->pid = pid;
1991         dual_timestamp_get(&s->exit_timestamp);
1992
1993         s->code = code;
1994         s->status = status;
1995
1996         if (context) {
1997                 if (context->utmp_id)
1998                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1999
2000                 exec_context_tty_reset(context);
2001         }
2002 }
2003
2004 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2005         char buf[FORMAT_TIMESTAMP_MAX];
2006
2007         assert(s);
2008         assert(f);
2009
2010         if (!prefix)
2011                 prefix = "";
2012
2013         if (s->pid <= 0)
2014                 return;
2015
2016         fprintf(f,
2017                 "%sPID: %lu\n",
2018                 prefix, (unsigned long) s->pid);
2019
2020         if (s->start_timestamp.realtime > 0)
2021                 fprintf(f,
2022                         "%sStart Timestamp: %s\n",
2023                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2024
2025         if (s->exit_timestamp.realtime > 0)
2026                 fprintf(f,
2027                         "%sExit Timestamp: %s\n"
2028                         "%sExit Code: %s\n"
2029                         "%sExit Status: %i\n",
2030                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2031                         prefix, sigchld_code_to_string(s->code),
2032                         prefix, s->status);
2033 }
2034
2035 char *exec_command_line(char **argv) {
2036         size_t k;
2037         char *n, *p, **a;
2038         bool first = true;
2039
2040         assert(argv);
2041
2042         k = 1;
2043         STRV_FOREACH(a, argv)
2044                 k += strlen(*a)+3;
2045
2046         if (!(n = new(char, k)))
2047                 return NULL;
2048
2049         p = n;
2050         STRV_FOREACH(a, argv) {
2051
2052                 if (!first)
2053                         *(p++) = ' ';
2054                 else
2055                         first = false;
2056
2057                 if (strpbrk(*a, WHITESPACE)) {
2058                         *(p++) = '\'';
2059                         p = stpcpy(p, *a);
2060                         *(p++) = '\'';
2061                 } else
2062                         p = stpcpy(p, *a);
2063
2064         }
2065
2066         *p = 0;
2067
2068         /* FIXME: this doesn't really handle arguments that have
2069          * spaces and ticks in them */
2070
2071         return n;
2072 }
2073
2074 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2075         char *p2;
2076         const char *prefix2;
2077
2078         char *cmd;
2079
2080         assert(c);
2081         assert(f);
2082
2083         if (!prefix)
2084                 prefix = "";
2085         p2 = strappend(prefix, "\t");
2086         prefix2 = p2 ? p2 : prefix;
2087
2088         cmd = exec_command_line(c->argv);
2089
2090         fprintf(f,
2091                 "%sCommand Line: %s\n",
2092                 prefix, cmd ? cmd : strerror(ENOMEM));
2093
2094         free(cmd);
2095
2096         exec_status_dump(&c->exec_status, f, prefix2);
2097
2098         free(p2);
2099 }
2100
2101 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2102         assert(f);
2103
2104         if (!prefix)
2105                 prefix = "";
2106
2107         LIST_FOREACH(command, c, c)
2108                 exec_command_dump(c, f, prefix);
2109 }
2110
2111 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2112         ExecCommand *end;
2113
2114         assert(l);
2115         assert(e);
2116
2117         if (*l) {
2118                 /* It's kind of important, that we keep the order here */
2119                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2120                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2121         } else
2122               *l = e;
2123 }
2124
2125 int exec_command_set(ExecCommand *c, const char *path, ...) {
2126         va_list ap;
2127         char **l, *p;
2128
2129         assert(c);
2130         assert(path);
2131
2132         va_start(ap, path);
2133         l = strv_new_ap(path, ap);
2134         va_end(ap);
2135
2136         if (!l)
2137                 return -ENOMEM;
2138
2139         if (!(p = strdup(path))) {
2140                 strv_free(l);
2141                 return -ENOMEM;
2142         }
2143
2144         free(c->path);
2145         c->path = p;
2146
2147         strv_free(c->argv);
2148         c->argv = l;
2149
2150         return 0;
2151 }
2152
2153 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2154         [EXEC_INPUT_NULL] = "null",
2155         [EXEC_INPUT_TTY] = "tty",
2156         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2157         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2158         [EXEC_INPUT_SOCKET] = "socket"
2159 };
2160
2161 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2162
2163 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2164         [EXEC_OUTPUT_INHERIT] = "inherit",
2165         [EXEC_OUTPUT_NULL] = "null",
2166         [EXEC_OUTPUT_TTY] = "tty",
2167         [EXEC_OUTPUT_SYSLOG] = "syslog",
2168         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2169         [EXEC_OUTPUT_KMSG] = "kmsg",
2170         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2171         [EXEC_OUTPUT_JOURNAL] = "journal",
2172         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2173         [EXEC_OUTPUT_SOCKET] = "socket"
2174 };
2175
2176 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);