chiark / gitweb /
execute: increase severity of journal connect failure message
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71
72 /* This assumes there is a 'tty' group */
73 #define TTY_MODE 0620
74
75 static int shift_fds(int fds[], unsigned n_fds) {
76         int start, restart_from;
77
78         if (n_fds <= 0)
79                 return 0;
80
81         /* Modifies the fds array! (sorts it) */
82
83         assert(fds);
84
85         start = 0;
86         for (;;) {
87                 int i;
88
89                 restart_from = -1;
90
91                 for (i = start; i < (int) n_fds; i++) {
92                         int nfd;
93
94                         /* Already at right index? */
95                         if (fds[i] == i+3)
96                                 continue;
97
98                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99                                 return -errno;
100
101                         close_nointr_nofail(fds[i]);
102                         fds[i] = nfd;
103
104                         /* Hmm, the fd we wanted isn't free? Then
105                          * let's remember that and try again from here*/
106                         if (nfd != i+3 && restart_from < 0)
107                                 restart_from = i;
108                 }
109
110                 if (restart_from < 0)
111                         break;
112
113                 start = restart_from;
114         }
115
116         return 0;
117 }
118
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120         unsigned i;
121         int r;
122
123         if (n_fds <= 0)
124                 return 0;
125
126         assert(fds);
127
128         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129
130         for (i = 0; i < n_fds; i++) {
131
132                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133                         return r;
134
135                 /* We unconditionally drop FD_CLOEXEC from the fds,
136                  * since after all we want to pass these fds to our
137                  * children */
138
139                 if ((r = fd_cloexec(fds[i], false)) < 0)
140                         return r;
141         }
142
143         return 0;
144 }
145
146 static const char *tty_path(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_path)
150                 return context->tty_path;
151
152         return "/dev/console";
153 }
154
155 void exec_context_tty_reset(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_vhangup)
159                 terminal_vhangup(tty_path(context));
160
161         if (context->tty_reset)
162                 reset_terminal(tty_path(context));
163
164         if (context->tty_vt_disallocate && context->tty_path)
165                 vt_disallocate(context->tty_path);
166 }
167
168 static int open_null_as(int flags, int nfd) {
169         int fd, r;
170
171         assert(nfd >= 0);
172
173         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
174                 return -errno;
175
176         if (fd != nfd) {
177                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178                 close_nointr_nofail(fd);
179         } else
180                 r = nfd;
181
182         return r;
183 }
184
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
186         int fd, r;
187         union sockaddr_union sa;
188
189         assert(context);
190         assert(output < _EXEC_OUTPUT_MAX);
191         assert(ident);
192         assert(nfd >= 0);
193
194         fd = socket(AF_UNIX, SOCK_STREAM, 0);
195         if (fd < 0)
196                 return -errno;
197
198         zero(sa);
199         sa.un.sun_family = AF_UNIX;
200         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
201
202         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
203         if (r < 0) {
204                 close_nointr_nofail(fd);
205                 return -errno;
206         }
207
208         if (shutdown(fd, SHUT_RD) < 0) {
209                 close_nointr_nofail(fd);
210                 return -errno;
211         }
212
213         dprintf(fd,
214                 "%s\n"
215                 "%s\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n"
219                 "%i\n"
220                 "%i\n",
221                 context->syslog_identifier ? context->syslog_identifier : ident,
222                 unit_id,
223                 context->syslog_priority,
224                 !!context->syslog_level_prefix,
225                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
228
229         if (fd != nfd) {
230                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231                 close_nointr_nofail(fd);
232         } else
233                 r = nfd;
234
235         return r;
236 }
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
238         int fd, r;
239
240         assert(path);
241         assert(nfd >= 0);
242
243         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
244                 return fd;
245
246         if (fd != nfd) {
247                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248                 close_nointr_nofail(fd);
249         } else
250                 r = nfd;
251
252         return r;
253 }
254
255 static bool is_terminal_input(ExecInput i) {
256         return
257                 i == EXEC_INPUT_TTY ||
258                 i == EXEC_INPUT_TTY_FORCE ||
259                 i == EXEC_INPUT_TTY_FAIL;
260 }
261
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
263
264         if (is_terminal_input(std_input) && !apply_tty_stdin)
265                 return EXEC_INPUT_NULL;
266
267         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268                 return EXEC_INPUT_NULL;
269
270         return std_input;
271 }
272
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
274
275         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276                 return EXEC_OUTPUT_INHERIT;
277
278         return std_output;
279 }
280
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
282         ExecInput i;
283
284         assert(context);
285
286         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
287
288         switch (i) {
289
290         case EXEC_INPUT_NULL:
291                 return open_null_as(O_RDONLY, STDIN_FILENO);
292
293         case EXEC_INPUT_TTY:
294         case EXEC_INPUT_TTY_FORCE:
295         case EXEC_INPUT_TTY_FAIL: {
296                 int fd, r;
297
298                 if ((fd = acquire_terminal(
299                                      tty_path(context),
300                                      i == EXEC_INPUT_TTY_FAIL,
301                                      i == EXEC_INPUT_TTY_FORCE,
302                                      false,
303                                      (usec_t) -1)) < 0)
304                         return fd;
305
306                 if (fd != STDIN_FILENO) {
307                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308                         close_nointr_nofail(fd);
309                 } else
310                         r = STDIN_FILENO;
311
312                 return r;
313         }
314
315         case EXEC_INPUT_SOCKET:
316                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
317
318         default:
319                 assert_not_reached("Unknown input type");
320         }
321 }
322
323 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
324         ExecOutput o;
325         ExecInput i;
326         int r;
327
328         assert(context);
329         assert(ident);
330
331         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
332         o = fixup_output(context->std_output, socket_fd);
333
334         if (fileno == STDERR_FILENO) {
335                 ExecOutput e;
336                 e = fixup_output(context->std_error, socket_fd);
337
338                 /* This expects the input and output are already set up */
339
340                 /* Don't change the stderr file descriptor if we inherit all
341                  * the way and are not on a tty */
342                 if (e == EXEC_OUTPUT_INHERIT &&
343                     o == EXEC_OUTPUT_INHERIT &&
344                     i == EXEC_INPUT_NULL &&
345                     !is_terminal_input(context->std_input) &&
346                     getppid () != 1)
347                         return fileno;
348
349                 /* Duplicate from stdout if possible */
350                 if (e == o || e == EXEC_OUTPUT_INHERIT)
351                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
352
353                 o = e;
354
355         } else if (o == EXEC_OUTPUT_INHERIT) {
356                 /* If input got downgraded, inherit the original value */
357                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
358                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
359
360                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
361                 if (i != EXEC_INPUT_NULL)
362                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
363
364                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
365                 if (getppid() != 1)
366                         return fileno;
367
368                 /* We need to open /dev/null here anew, to get the right access mode. */
369                 return open_null_as(O_WRONLY, fileno);
370         }
371
372         switch (o) {
373
374         case EXEC_OUTPUT_NULL:
375                 return open_null_as(O_WRONLY, fileno);
376
377         case EXEC_OUTPUT_TTY:
378                 if (is_terminal_input(i))
379                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
380
381                 /* We don't reset the terminal if this is just about output */
382                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
383
384         case EXEC_OUTPUT_SYSLOG:
385         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
386         case EXEC_OUTPUT_KMSG:
387         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
388         case EXEC_OUTPUT_JOURNAL:
389         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
390                 r = connect_logger_as(context, o, ident, unit_id, fileno);
391                 if (r < 0) {
392                         log_struct_unit(LOG_CRIT, unit_id,
393                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
394                                 fileno == STDOUT_FILENO ? "out" : "err",
395                                 unit_id, strerror(-r),
396                                 "ERRNO=%d", -r,
397                                 NULL);
398                         r = open_null_as(O_WRONLY, fileno);
399                 }
400                 return r;
401
402         case EXEC_OUTPUT_SOCKET:
403                 assert(socket_fd >= 0);
404                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
405
406         default:
407                 assert_not_reached("Unknown error type");
408         }
409 }
410
411 static int chown_terminal(int fd, uid_t uid) {
412         struct stat st;
413
414         assert(fd >= 0);
415
416         /* This might fail. What matters are the results. */
417         (void) fchown(fd, uid, -1);
418         (void) fchmod(fd, TTY_MODE);
419
420         if (fstat(fd, &st) < 0)
421                 return -errno;
422
423         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
424                 return -EPERM;
425
426         return 0;
427 }
428
429 static int setup_confirm_stdio(int *_saved_stdin,
430                                int *_saved_stdout) {
431         int fd = -1, saved_stdin, saved_stdout = -1, r;
432
433         assert(_saved_stdin);
434         assert(_saved_stdout);
435
436         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
437         if (saved_stdin < 0)
438                 return -errno;
439
440         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
441         if (saved_stdout < 0) {
442                 r = errno;
443                 goto fail;
444         }
445
446         fd = acquire_terminal(
447                         "/dev/console",
448                         false,
449                         false,
450                         false,
451                         DEFAULT_CONFIRM_USEC);
452         if (fd < 0) {
453                 r = fd;
454                 goto fail;
455         }
456
457         r = chown_terminal(fd, getuid());
458         if (r < 0)
459                 goto fail;
460
461         if (dup2(fd, STDIN_FILENO) < 0) {
462                 r = -errno;
463                 goto fail;
464         }
465
466         if (dup2(fd, STDOUT_FILENO) < 0) {
467                 r = -errno;
468                 goto fail;
469         }
470
471         if (fd >= 2)
472                 close_nointr_nofail(fd);
473
474         *_saved_stdin = saved_stdin;
475         *_saved_stdout = saved_stdout;
476
477         return 0;
478
479 fail:
480         if (saved_stdout >= 0)
481                 close_nointr_nofail(saved_stdout);
482
483         if (saved_stdin >= 0)
484                 close_nointr_nofail(saved_stdin);
485
486         if (fd >= 0)
487                 close_nointr_nofail(fd);
488
489         return r;
490 }
491
492 static int write_confirm_message(const char *format, ...) {
493         int fd;
494         va_list ap;
495
496         assert(format);
497
498         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
499         if (fd < 0)
500                 return fd;
501
502         va_start(ap, format);
503         vdprintf(fd, format, ap);
504         va_end(ap);
505
506         close_nointr_nofail(fd);
507
508         return 0;
509 }
510
511 static int restore_confirm_stdio(int *saved_stdin,
512                                  int *saved_stdout) {
513
514         int r = 0;
515
516         assert(saved_stdin);
517         assert(saved_stdout);
518
519         release_terminal();
520
521         if (*saved_stdin >= 0)
522                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
523                         r = -errno;
524
525         if (*saved_stdout >= 0)
526                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
527                         r = -errno;
528
529         if (*saved_stdin >= 0)
530                 close_nointr_nofail(*saved_stdin);
531
532         if (*saved_stdout >= 0)
533                 close_nointr_nofail(*saved_stdout);
534
535         return r;
536 }
537
538 static int ask_for_confirmation(char *response, char **argv) {
539         int saved_stdout = -1, saved_stdin = -1, r;
540         char *line;
541
542         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
543         if (r < 0)
544                 return r;
545
546         line = exec_command_line(argv);
547         if (!line)
548                 return -ENOMEM;
549
550         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
551         free(line);
552
553         restore_confirm_stdio(&saved_stdin, &saved_stdout);
554
555         return r;
556 }
557
558 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
559         bool keep_groups = false;
560         int r;
561
562         assert(context);
563
564         /* Lookup and set GID and supplementary group list. Here too
565          * we avoid NSS lookups for gid=0. */
566
567         if (context->group || username) {
568
569                 if (context->group) {
570                         const char *g = context->group;
571
572                         if ((r = get_group_creds(&g, &gid)) < 0)
573                                 return r;
574                 }
575
576                 /* First step, initialize groups from /etc/groups */
577                 if (username && gid != 0) {
578                         if (initgroups(username, gid) < 0)
579                                 return -errno;
580
581                         keep_groups = true;
582                 }
583
584                 /* Second step, set our gids */
585                 if (setresgid(gid, gid, gid) < 0)
586                         return -errno;
587         }
588
589         if (context->supplementary_groups) {
590                 int ngroups_max, k;
591                 gid_t *gids;
592                 char **i;
593
594                 /* Final step, initialize any manually set supplementary groups */
595                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
596
597                 if (!(gids = new(gid_t, ngroups_max)))
598                         return -ENOMEM;
599
600                 if (keep_groups) {
601                         if ((k = getgroups(ngroups_max, gids)) < 0) {
602                                 free(gids);
603                                 return -errno;
604                         }
605                 } else
606                         k = 0;
607
608                 STRV_FOREACH(i, context->supplementary_groups) {
609                         const char *g;
610
611                         if (k >= ngroups_max) {
612                                 free(gids);
613                                 return -E2BIG;
614                         }
615
616                         g = *i;
617                         r = get_group_creds(&g, gids+k);
618                         if (r < 0) {
619                                 free(gids);
620                                 return r;
621                         }
622
623                         k++;
624                 }
625
626                 if (setgroups(k, gids) < 0) {
627                         free(gids);
628                         return -errno;
629                 }
630
631                 free(gids);
632         }
633
634         return 0;
635 }
636
637 static int enforce_user(const ExecContext *context, uid_t uid) {
638         int r;
639         assert(context);
640
641         /* Sets (but doesn't lookup) the uid and make sure we keep the
642          * capabilities while doing so. */
643
644         if (context->capabilities) {
645                 cap_t d;
646                 static const cap_value_t bits[] = {
647                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
648                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
649                 };
650
651                 /* First step: If we need to keep capabilities but
652                  * drop privileges we need to make sure we keep our
653                  * caps, whiel we drop privileges. */
654                 if (uid != 0) {
655                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
656
657                         if (prctl(PR_GET_SECUREBITS) != sb)
658                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
659                                         return -errno;
660                 }
661
662                 /* Second step: set the capabilities. This will reduce
663                  * the capabilities to the minimum we need. */
664
665                 if (!(d = cap_dup(context->capabilities)))
666                         return -errno;
667
668                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
669                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
670                         r = -errno;
671                         cap_free(d);
672                         return r;
673                 }
674
675                 if (cap_set_proc(d) < 0) {
676                         r = -errno;
677                         cap_free(d);
678                         return r;
679                 }
680
681                 cap_free(d);
682         }
683
684         /* Third step: actually set the uids */
685         if (setresuid(uid, uid, uid) < 0)
686                 return -errno;
687
688         /* At this point we should have all necessary capabilities but
689            are otherwise a normal user. However, the caps might got
690            corrupted due to the setresuid() so we need clean them up
691            later. This is done outside of this call. */
692
693         return 0;
694 }
695
696 #ifdef HAVE_PAM
697
698 static int null_conv(
699                 int num_msg,
700                 const struct pam_message **msg,
701                 struct pam_response **resp,
702                 void *appdata_ptr) {
703
704         /* We don't support conversations */
705
706         return PAM_CONV_ERR;
707 }
708
709 static int setup_pam(
710                 const char *name,
711                 const char *user,
712                 uid_t uid,
713                 const char *tty,
714                 char ***pam_env,
715                 int fds[], unsigned n_fds) {
716
717         static const struct pam_conv conv = {
718                 .conv = null_conv,
719                 .appdata_ptr = NULL
720         };
721
722         pam_handle_t *handle = NULL;
723         sigset_t ss, old_ss;
724         int pam_code = PAM_SUCCESS;
725         int err;
726         char **e = NULL;
727         bool close_session = false;
728         pid_t pam_pid = 0, parent_pid;
729
730         assert(name);
731         assert(user);
732         assert(pam_env);
733
734         /* We set up PAM in the parent process, then fork. The child
735          * will then stay around until killed via PR_GET_PDEATHSIG or
736          * systemd via the cgroup logic. It will then remove the PAM
737          * session again. The parent process will exec() the actual
738          * daemon. We do things this way to ensure that the main PID
739          * of the daemon is the one we initially fork()ed. */
740
741         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
742                 handle = NULL;
743                 goto fail;
744         }
745
746         if (tty)
747                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
748                         goto fail;
749
750         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
751                 goto fail;
752
753         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
754                 goto fail;
755
756         close_session = true;
757
758         if ((!(e = pam_getenvlist(handle)))) {
759                 pam_code = PAM_BUF_ERR;
760                 goto fail;
761         }
762
763         /* Block SIGTERM, so that we know that it won't get lost in
764          * the child */
765         if (sigemptyset(&ss) < 0 ||
766             sigaddset(&ss, SIGTERM) < 0 ||
767             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
768                 goto fail;
769
770         parent_pid = getpid();
771
772         if ((pam_pid = fork()) < 0)
773                 goto fail;
774
775         if (pam_pid == 0) {
776                 int sig;
777                 int r = EXIT_PAM;
778
779                 /* The child's job is to reset the PAM session on
780                  * termination */
781
782                 /* This string must fit in 10 chars (i.e. the length
783                  * of "/sbin/init"), to look pretty in /bin/ps */
784                 rename_process("(sd-pam)");
785
786                 /* Make sure we don't keep open the passed fds in this
787                 child. We assume that otherwise only those fds are
788                 open here that have been opened by PAM. */
789                 close_many(fds, n_fds);
790
791                 /* Drop privileges - we don't need any to pam_close_session
792                  * and this will make PR_SET_PDEATHSIG work in most cases.
793                  * If this fails, ignore the error - but expect sd-pam threads
794                  * to fail to exit normally */
795                 if (setresuid(uid, uid, uid) < 0)
796                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
797
798                 /* Wait until our parent died. This will only work if
799                  * the above setresuid() succeeds, otherwise the kernel
800                  * will not allow unprivileged parents kill their privileged
801                  * children this way. We rely on the control groups kill logic
802                  * to do the rest for us. */
803                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
804                         goto child_finish;
805
806                 /* Check if our parent process might already have
807                  * died? */
808                 if (getppid() == parent_pid) {
809                         for (;;) {
810                                 if (sigwait(&ss, &sig) < 0) {
811                                         if (errno == EINTR)
812                                                 continue;
813
814                                         goto child_finish;
815                                 }
816
817                                 assert(sig == SIGTERM);
818                                 break;
819                         }
820                 }
821
822                 /* If our parent died we'll end the session */
823                 if (getppid() != parent_pid)
824                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
825                                 goto child_finish;
826
827                 r = 0;
828
829         child_finish:
830                 pam_end(handle, pam_code | PAM_DATA_SILENT);
831                 _exit(r);
832         }
833
834         /* If the child was forked off successfully it will do all the
835          * cleanups, so forget about the handle here. */
836         handle = NULL;
837
838         /* Unblock SIGTERM again in the parent */
839         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
840                 goto fail;
841
842         /* We close the log explicitly here, since the PAM modules
843          * might have opened it, but we don't want this fd around. */
844         closelog();
845
846         *pam_env = e;
847         e = NULL;
848
849         return 0;
850
851 fail:
852         if (pam_code != PAM_SUCCESS)
853                 err = -EPERM;  /* PAM errors do not map to errno */
854         else
855                 err = -errno;
856
857         if (handle) {
858                 if (close_session)
859                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
860
861                 pam_end(handle, pam_code | PAM_DATA_SILENT);
862         }
863
864         strv_free(e);
865
866         closelog();
867
868         if (pam_pid > 1) {
869                 kill(pam_pid, SIGTERM);
870                 kill(pam_pid, SIGCONT);
871         }
872
873         return err;
874 }
875 #endif
876
877 static void rename_process_from_path(const char *path) {
878         char process_name[11];
879         const char *p;
880         size_t l;
881
882         /* This resulting string must fit in 10 chars (i.e. the length
883          * of "/sbin/init") to look pretty in /bin/ps */
884
885         p = path_get_file_name(path);
886         if (isempty(p)) {
887                 rename_process("(...)");
888                 return;
889         }
890
891         l = strlen(p);
892         if (l > 8) {
893                 /* The end of the process name is usually more
894                  * interesting, since the first bit might just be
895                  * "systemd-" */
896                 p = p + l - 8;
897                 l = 8;
898         }
899
900         process_name[0] = '(';
901         memcpy(process_name+1, p, l);
902         process_name[1+l] = ')';
903         process_name[1+l+1] = 0;
904
905         rename_process(process_name);
906 }
907
908 static int apply_seccomp(uint32_t *syscall_filter) {
909         static const struct sock_filter header[] = {
910                 VALIDATE_ARCHITECTURE,
911                 EXAMINE_SYSCALL
912         };
913         static const struct sock_filter footer[] = {
914                 _KILL_PROCESS
915         };
916
917         int i;
918         unsigned n;
919         struct sock_filter *f;
920         struct sock_fprog prog;
921
922         assert(syscall_filter);
923
924         /* First: count the syscalls to check for */
925         for (i = 0, n = 0; i < syscall_max(); i++)
926                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
927                         n++;
928
929         /* Second: build the filter program from a header the syscall
930          * matches and the footer */
931         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
932         memcpy(f, header, sizeof(header));
933
934         for (i = 0, n = 0; i < syscall_max(); i++)
935                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
936                         struct sock_filter item[] = {
937                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
938                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
939                         };
940
941                         assert_cc(ELEMENTSOF(item) == 2);
942
943                         f[ELEMENTSOF(header) + 2*n]  = item[0];
944                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
945
946                         n++;
947                 }
948
949         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
950
951         /* Third: install the filter */
952         zero(prog);
953         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
954         prog.filter = f;
955         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
956                 return -errno;
957
958         return 0;
959 }
960
961 int exec_spawn(ExecCommand *command,
962                char **argv,
963                const ExecContext *context,
964                int fds[], unsigned n_fds,
965                char **environment,
966                bool apply_permissions,
967                bool apply_chroot,
968                bool apply_tty_stdin,
969                bool confirm_spawn,
970                CGroupBonding *cgroup_bondings,
971                CGroupAttribute *cgroup_attributes,
972                const char *cgroup_suffix,
973                const char *unit_id,
974                int idle_pipe[2],
975                pid_t *ret) {
976
977         pid_t pid;
978         int r;
979         char *line;
980         int socket_fd;
981         char _cleanup_strv_free_ **files_env = NULL;
982
983         assert(command);
984         assert(context);
985         assert(ret);
986         assert(fds || n_fds <= 0);
987
988         if (context->std_input == EXEC_INPUT_SOCKET ||
989             context->std_output == EXEC_OUTPUT_SOCKET ||
990             context->std_error == EXEC_OUTPUT_SOCKET) {
991
992                 if (n_fds != 1)
993                         return -EINVAL;
994
995                 socket_fd = fds[0];
996
997                 fds = NULL;
998                 n_fds = 0;
999         } else
1000                 socket_fd = -1;
1001
1002         r = exec_context_load_environment(context, &files_env);
1003         if (r < 0) {
1004                 log_struct_unit(LOG_ERR,
1005                            unit_id,
1006                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1007                            "ERRNO=%d", -r,
1008                            NULL);
1009                 return r;
1010         }
1011
1012         if (!argv)
1013                 argv = command->argv;
1014
1015         line = exec_command_line(argv);
1016         if (!line)
1017                 return log_oom();
1018
1019         log_struct_unit(LOG_DEBUG,
1020                    unit_id,
1021                    "MESSAGE=About to execute %s", line,
1022                    NULL);
1023         free(line);
1024
1025         r = cgroup_bonding_realize_list(cgroup_bondings);
1026         if (r < 0)
1027                 return r;
1028
1029         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1030
1031         pid = fork();
1032         if (pid < 0)
1033                 return -errno;
1034
1035         if (pid == 0) {
1036                 int i, err;
1037                 sigset_t ss;
1038                 const char *username = NULL, *home = NULL;
1039                 uid_t uid = (uid_t) -1;
1040                 gid_t gid = (gid_t) -1;
1041                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1042                         **final_env = NULL, **final_argv = NULL;
1043                 unsigned n_env = 0;
1044                 bool set_access = false;
1045
1046                 /* child */
1047
1048                 rename_process_from_path(command->path);
1049
1050                 /* We reset exactly these signals, since they are the
1051                  * only ones we set to SIG_IGN in the main daemon. All
1052                  * others we leave untouched because we set them to
1053                  * SIG_DFL or a valid handler initially, both of which
1054                  * will be demoted to SIG_DFL. */
1055                 default_signals(SIGNALS_CRASH_HANDLER,
1056                                 SIGNALS_IGNORE, -1);
1057
1058                 if (context->ignore_sigpipe)
1059                         ignore_signals(SIGPIPE, -1);
1060
1061                 assert_se(sigemptyset(&ss) == 0);
1062                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1063                         err = -errno;
1064                         r = EXIT_SIGNAL_MASK;
1065                         goto fail_child;
1066                 }
1067
1068                 if (idle_pipe) {
1069                         if (idle_pipe[1] >= 0)
1070                                 close_nointr_nofail(idle_pipe[1]);
1071                         if (idle_pipe[0] >= 0) {
1072                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1073                                 close_nointr_nofail(idle_pipe[0]);
1074                         }
1075                 }
1076
1077                 /* Close sockets very early to make sure we don't
1078                  * block init reexecution because it cannot bind its
1079                  * sockets */
1080                 log_forget_fds();
1081                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1082                                            socket_fd >= 0 ? 1 : n_fds);
1083                 if (err < 0) {
1084                         r = EXIT_FDS;
1085                         goto fail_child;
1086                 }
1087
1088                 if (!context->same_pgrp)
1089                         if (setsid() < 0) {
1090                                 err = -errno;
1091                                 r = EXIT_SETSID;
1092                                 goto fail_child;
1093                         }
1094
1095                 if (context->tcpwrap_name) {
1096                         if (socket_fd >= 0)
1097                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1098                                         err = -EACCES;
1099                                         r = EXIT_TCPWRAP;
1100                                         goto fail_child;
1101                                 }
1102
1103                         for (i = 0; i < (int) n_fds; i++) {
1104                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1105                                         err = -EACCES;
1106                                         r = EXIT_TCPWRAP;
1107                                         goto fail_child;
1108                                 }
1109                         }
1110                 }
1111
1112                 exec_context_tty_reset(context);
1113
1114                 if (confirm_spawn) {
1115                         char response;
1116
1117                         err = ask_for_confirmation(&response, argv);
1118                         if (err == -ETIMEDOUT)
1119                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1120                         else if (err < 0)
1121                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1122                         else if (response == 's') {
1123                                 write_confirm_message("Skipping execution.\n");
1124                                 err = -ECANCELED;
1125                                 r = EXIT_CONFIRM;
1126                                 goto fail_child;
1127                         } else if (response == 'n') {
1128                                 write_confirm_message("Failing execution.\n");
1129                                 err = r = 0;
1130                                 goto fail_child;
1131                         }
1132                 }
1133
1134                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1135                  * must sure to drop O_NONBLOCK */
1136                 if (socket_fd >= 0)
1137                         fd_nonblock(socket_fd, false);
1138
1139                 err = setup_input(context, socket_fd, apply_tty_stdin);
1140                 if (err < 0) {
1141                         r = EXIT_STDIN;
1142                         goto fail_child;
1143                 }
1144
1145                 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1146                 if (err < 0) {
1147                         r = EXIT_STDOUT;
1148                         goto fail_child;
1149                 }
1150
1151                 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1152                 if (err < 0) {
1153                         r = EXIT_STDERR;
1154                         goto fail_child;
1155                 }
1156
1157                 if (cgroup_bondings) {
1158                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1159                         if (err < 0) {
1160                                 r = EXIT_CGROUP;
1161                                 goto fail_child;
1162                         }
1163                 }
1164
1165                 if (context->oom_score_adjust_set) {
1166                         char t[16];
1167
1168                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1169                         char_array_0(t);
1170
1171                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1172                                 err = -errno;
1173                                 r = EXIT_OOM_ADJUST;
1174                                 goto fail_child;
1175                         }
1176                 }
1177
1178                 if (context->nice_set)
1179                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1180                                 err = -errno;
1181                                 r = EXIT_NICE;
1182                                 goto fail_child;
1183                         }
1184
1185                 if (context->cpu_sched_set) {
1186                         struct sched_param param;
1187
1188                         zero(param);
1189                         param.sched_priority = context->cpu_sched_priority;
1190
1191                         if (sched_setscheduler(0, context->cpu_sched_policy |
1192                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1193                                 err = -errno;
1194                                 r = EXIT_SETSCHEDULER;
1195                                 goto fail_child;
1196                         }
1197                 }
1198
1199                 if (context->cpuset)
1200                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1201                                 err = -errno;
1202                                 r = EXIT_CPUAFFINITY;
1203                                 goto fail_child;
1204                         }
1205
1206                 if (context->ioprio_set)
1207                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1208                                 err = -errno;
1209                                 r = EXIT_IOPRIO;
1210                                 goto fail_child;
1211                         }
1212
1213                 if (context->timer_slack_nsec != (nsec_t) -1)
1214                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1215                                 err = -errno;
1216                                 r = EXIT_TIMERSLACK;
1217                                 goto fail_child;
1218                         }
1219
1220                 if (context->utmp_id)
1221                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1222
1223                 if (context->user) {
1224                         username = context->user;
1225                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1226                         if (err < 0) {
1227                                 r = EXIT_USER;
1228                                 goto fail_child;
1229                         }
1230
1231                         if (is_terminal_input(context->std_input)) {
1232                                 err = chown_terminal(STDIN_FILENO, uid);
1233                                 if (err < 0) {
1234                                         r = EXIT_STDIN;
1235                                         goto fail_child;
1236                                 }
1237                         }
1238
1239                         if (cgroup_bondings && context->control_group_modify) {
1240                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1241                                 if (err >= 0)
1242                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1243                                 if (err < 0) {
1244                                         r = EXIT_CGROUP;
1245                                         goto fail_child;
1246                                 }
1247
1248                                 set_access = true;
1249                         }
1250                 }
1251
1252                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1253                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1254                         if (err < 0) {
1255                                 r = EXIT_CGROUP;
1256                                 goto fail_child;
1257                         }
1258                 }
1259
1260                 if (apply_permissions) {
1261                         err = enforce_groups(context, username, gid);
1262                         if (err < 0) {
1263                                 r = EXIT_GROUP;
1264                                 goto fail_child;
1265                         }
1266                 }
1267
1268                 umask(context->umask);
1269
1270 #ifdef HAVE_PAM
1271                 if (apply_permissions && context->pam_name && username) {
1272                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1273                         if (err < 0) {
1274                                 r = EXIT_PAM;
1275                                 goto fail_child;
1276                         }
1277                 }
1278 #endif
1279                 if (context->private_network) {
1280                         if (unshare(CLONE_NEWNET) < 0) {
1281                                 err = -errno;
1282                                 r = EXIT_NETWORK;
1283                                 goto fail_child;
1284                         }
1285
1286                         loopback_setup();
1287                 }
1288
1289                 if (strv_length(context->read_write_dirs) > 0 ||
1290                     strv_length(context->read_only_dirs) > 0 ||
1291                     strv_length(context->inaccessible_dirs) > 0 ||
1292                     context->mount_flags != 0 ||
1293                     context->private_tmp) {
1294                         err = setup_namespace(context->read_write_dirs,
1295                                               context->read_only_dirs,
1296                                               context->inaccessible_dirs,
1297                                               context->private_tmp,
1298                                               context->mount_flags);
1299                         if (err < 0) {
1300                                 r = EXIT_NAMESPACE;
1301                                 goto fail_child;
1302                         }
1303                 }
1304
1305                 if (apply_chroot) {
1306                         if (context->root_directory)
1307                                 if (chroot(context->root_directory) < 0) {
1308                                         err = -errno;
1309                                         r = EXIT_CHROOT;
1310                                         goto fail_child;
1311                                 }
1312
1313                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1314                                 err = -errno;
1315                                 r = EXIT_CHDIR;
1316                                 goto fail_child;
1317                         }
1318                 } else {
1319                         char _cleanup_free_ *d = NULL;
1320
1321                         if (asprintf(&d, "%s/%s",
1322                                      context->root_directory ? context->root_directory : "",
1323                                      context->working_directory ? context->working_directory : "") < 0) {
1324                                 err = -ENOMEM;
1325                                 r = EXIT_MEMORY;
1326                                 goto fail_child;
1327                         }
1328
1329                         if (chdir(d) < 0) {
1330                                 err = -errno;
1331                                 r = EXIT_CHDIR;
1332                                 goto fail_child;
1333                         }
1334                 }
1335
1336                 /* We repeat the fd closing here, to make sure that
1337                  * nothing is leaked from the PAM modules */
1338                 err = close_all_fds(fds, n_fds);
1339                 if (err >= 0)
1340                         err = shift_fds(fds, n_fds);
1341                 if (err >= 0)
1342                         err = flags_fds(fds, n_fds, context->non_blocking);
1343                 if (err < 0) {
1344                         r = EXIT_FDS;
1345                         goto fail_child;
1346                 }
1347
1348                 if (apply_permissions) {
1349
1350                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1351                                 if (!context->rlimit[i])
1352                                         continue;
1353
1354                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1355                                         err = -errno;
1356                                         r = EXIT_LIMITS;
1357                                         goto fail_child;
1358                                 }
1359                         }
1360
1361                         if (context->capability_bounding_set_drop) {
1362                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1363                                 if (err < 0) {
1364                                         r = EXIT_CAPABILITIES;
1365                                         goto fail_child;
1366                                 }
1367                         }
1368
1369                         if (context->user) {
1370                                 err = enforce_user(context, uid);
1371                                 if (err < 0) {
1372                                         r = EXIT_USER;
1373                                         goto fail_child;
1374                                 }
1375                         }
1376
1377                         /* PR_GET_SECUREBITS is not privileged, while
1378                          * PR_SET_SECUREBITS is. So to suppress
1379                          * potential EPERMs we'll try not to call
1380                          * PR_SET_SECUREBITS unless necessary. */
1381                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1382                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1383                                         err = -errno;
1384                                         r = EXIT_SECUREBITS;
1385                                         goto fail_child;
1386                                 }
1387
1388                         if (context->capabilities)
1389                                 if (cap_set_proc(context->capabilities) < 0) {
1390                                         err = -errno;
1391                                         r = EXIT_CAPABILITIES;
1392                                         goto fail_child;
1393                                 }
1394
1395                         if (context->no_new_privileges)
1396                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1397                                         err = -errno;
1398                                         r = EXIT_NO_NEW_PRIVILEGES;
1399                                         goto fail_child;
1400                                 }
1401
1402                         if (context->syscall_filter) {
1403                                 err = apply_seccomp(context->syscall_filter);
1404                                 if (err < 0) {
1405                                         r = EXIT_SECCOMP;
1406                                         goto fail_child;
1407                                 }
1408                         }
1409                 }
1410
1411                 if (!(our_env = new0(char*, 7))) {
1412                         err = -ENOMEM;
1413                         r = EXIT_MEMORY;
1414                         goto fail_child;
1415                 }
1416
1417                 if (n_fds > 0)
1418                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1419                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1420                                 err = -ENOMEM;
1421                                 r = EXIT_MEMORY;
1422                                 goto fail_child;
1423                         }
1424
1425                 if (home)
1426                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1427                                 err = -ENOMEM;
1428                                 r = EXIT_MEMORY;
1429                                 goto fail_child;
1430                         }
1431
1432                 if (username)
1433                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1434                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1435                                 err = -ENOMEM;
1436                                 r = EXIT_MEMORY;
1437                                 goto fail_child;
1438                         }
1439
1440                 if (is_terminal_input(context->std_input) ||
1441                     context->std_output == EXEC_OUTPUT_TTY ||
1442                     context->std_error == EXEC_OUTPUT_TTY)
1443                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1444                                 err = -ENOMEM;
1445                                 r = EXIT_MEMORY;
1446                                 goto fail_child;
1447                         }
1448
1449                 assert(n_env <= 7);
1450
1451                 if (!(final_env = strv_env_merge(
1452                                       5,
1453                                       environment,
1454                                       our_env,
1455                                       context->environment,
1456                                       files_env,
1457                                       pam_env,
1458                                       NULL))) {
1459                         err = -ENOMEM;
1460                         r = EXIT_MEMORY;
1461                         goto fail_child;
1462                 }
1463
1464                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1465                         err = -ENOMEM;
1466                         r = EXIT_MEMORY;
1467                         goto fail_child;
1468                 }
1469
1470                 final_env = strv_env_clean(final_env);
1471
1472                 execve(command->path, final_argv, final_env);
1473                 err = -errno;
1474                 r = EXIT_EXEC;
1475
1476         fail_child:
1477                 if (r != 0) {
1478                         log_open();
1479                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1480                                    "EXECUTABLE=%s", command->path,
1481                                    "MESSAGE=Failed at step %s spawning %s: %s",
1482                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1483                                           command->path, strerror(-err),
1484                                    "ERRNO=%d", -err,
1485                                    NULL);
1486                         log_close();
1487                 }
1488
1489                 _exit(r);
1490         }
1491
1492         log_struct_unit(LOG_DEBUG,
1493                    unit_id,
1494                    "MESSAGE=Forked %s as %lu",
1495                           command->path, (unsigned long) pid,
1496                    NULL);
1497
1498         /* We add the new process to the cgroup both in the child (so
1499          * that we can be sure that no user code is ever executed
1500          * outside of the cgroup) and in the parent (so that we can be
1501          * sure that when we kill the cgroup the process will be
1502          * killed too). */
1503         if (cgroup_bondings)
1504                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1505
1506         exec_status_start(&command->exec_status, pid);
1507
1508         *ret = pid;
1509         return 0;
1510 }
1511
1512 void exec_context_init(ExecContext *c) {
1513         assert(c);
1514
1515         c->umask = 0022;
1516         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1517         c->cpu_sched_policy = SCHED_OTHER;
1518         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1519         c->syslog_level_prefix = true;
1520         c->control_group_persistent = -1;
1521         c->ignore_sigpipe = true;
1522         c->timer_slack_nsec = (nsec_t) -1;
1523 }
1524
1525 void exec_context_done(ExecContext *c) {
1526         unsigned l;
1527
1528         assert(c);
1529
1530         strv_free(c->environment);
1531         c->environment = NULL;
1532
1533         strv_free(c->environment_files);
1534         c->environment_files = NULL;
1535
1536         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1537                 free(c->rlimit[l]);
1538                 c->rlimit[l] = NULL;
1539         }
1540
1541         free(c->working_directory);
1542         c->working_directory = NULL;
1543         free(c->root_directory);
1544         c->root_directory = NULL;
1545
1546         free(c->tty_path);
1547         c->tty_path = NULL;
1548
1549         free(c->tcpwrap_name);
1550         c->tcpwrap_name = NULL;
1551
1552         free(c->syslog_identifier);
1553         c->syslog_identifier = NULL;
1554
1555         free(c->user);
1556         c->user = NULL;
1557
1558         free(c->group);
1559         c->group = NULL;
1560
1561         strv_free(c->supplementary_groups);
1562         c->supplementary_groups = NULL;
1563
1564         free(c->pam_name);
1565         c->pam_name = NULL;
1566
1567         if (c->capabilities) {
1568                 cap_free(c->capabilities);
1569                 c->capabilities = NULL;
1570         }
1571
1572         strv_free(c->read_only_dirs);
1573         c->read_only_dirs = NULL;
1574
1575         strv_free(c->read_write_dirs);
1576         c->read_write_dirs = NULL;
1577
1578         strv_free(c->inaccessible_dirs);
1579         c->inaccessible_dirs = NULL;
1580
1581         if (c->cpuset)
1582                 CPU_FREE(c->cpuset);
1583
1584         free(c->utmp_id);
1585         c->utmp_id = NULL;
1586
1587         free(c->syscall_filter);
1588         c->syscall_filter = NULL;
1589 }
1590
1591 void exec_command_done(ExecCommand *c) {
1592         assert(c);
1593
1594         free(c->path);
1595         c->path = NULL;
1596
1597         strv_free(c->argv);
1598         c->argv = NULL;
1599 }
1600
1601 void exec_command_done_array(ExecCommand *c, unsigned n) {
1602         unsigned i;
1603
1604         for (i = 0; i < n; i++)
1605                 exec_command_done(c+i);
1606 }
1607
1608 void exec_command_free_list(ExecCommand *c) {
1609         ExecCommand *i;
1610
1611         while ((i = c)) {
1612                 LIST_REMOVE(ExecCommand, command, c, i);
1613                 exec_command_done(i);
1614                 free(i);
1615         }
1616 }
1617
1618 void exec_command_free_array(ExecCommand **c, unsigned n) {
1619         unsigned i;
1620
1621         for (i = 0; i < n; i++) {
1622                 exec_command_free_list(c[i]);
1623                 c[i] = NULL;
1624         }
1625 }
1626
1627 int exec_context_load_environment(const ExecContext *c, char ***l) {
1628         char **i, **r = NULL;
1629
1630         assert(c);
1631         assert(l);
1632
1633         STRV_FOREACH(i, c->environment_files) {
1634                 char *fn;
1635                 int k;
1636                 bool ignore = false;
1637                 char **p;
1638                 glob_t pglob;
1639                 int count, n;
1640
1641                 fn = *i;
1642
1643                 if (fn[0] == '-') {
1644                         ignore = true;
1645                         fn ++;
1646                 }
1647
1648                 if (!path_is_absolute(fn)) {
1649
1650                         if (ignore)
1651                                 continue;
1652
1653                         strv_free(r);
1654                         return -EINVAL;
1655                 }
1656
1657                 /* Filename supports globbing, take all matching files */
1658                 zero(pglob);
1659                 errno = 0;
1660                 if (glob(fn, 0, NULL, &pglob) != 0) {
1661                         globfree(&pglob);
1662                         if (ignore)
1663                                 continue;
1664
1665                         strv_free(r);
1666                         return errno ? -errno : -EINVAL;
1667                 }
1668                 count = pglob.gl_pathc;
1669                 if (count == 0) {
1670                         globfree(&pglob);
1671                         if (ignore)
1672                                 continue;
1673
1674                         strv_free(r);
1675                         return -EINVAL;
1676                 }
1677                 for (n = 0; n < count; n++) {
1678                         k = load_env_file(pglob.gl_pathv[n], &p);
1679                         if (k < 0) {
1680                                 if (ignore)
1681                                         continue;
1682
1683                                 strv_free(r);
1684                                 globfree(&pglob);
1685                                 return k;
1686                          }
1687
1688                         if (r == NULL)
1689                                 r = p;
1690                         else {
1691                                 char **m;
1692
1693                                 m = strv_env_merge(2, r, p);
1694                                 strv_free(r);
1695                                 strv_free(p);
1696
1697                                 if (!m) {
1698                                         globfree(&pglob);
1699                                         return -ENOMEM;
1700                                 }
1701
1702                                 r = m;
1703                         }
1704                 }
1705                 globfree(&pglob);
1706         }
1707
1708         *l = r;
1709
1710         return 0;
1711 }
1712
1713 static void strv_fprintf(FILE *f, char **l) {
1714         char **g;
1715
1716         assert(f);
1717
1718         STRV_FOREACH(g, l)
1719                 fprintf(f, " %s", *g);
1720 }
1721
1722 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1723         char ** e;
1724         unsigned i;
1725
1726         assert(c);
1727         assert(f);
1728
1729         if (!prefix)
1730                 prefix = "";
1731
1732         fprintf(f,
1733                 "%sUMask: %04o\n"
1734                 "%sWorkingDirectory: %s\n"
1735                 "%sRootDirectory: %s\n"
1736                 "%sNonBlocking: %s\n"
1737                 "%sPrivateTmp: %s\n"
1738                 "%sControlGroupModify: %s\n"
1739                 "%sControlGroupPersistent: %s\n"
1740                 "%sPrivateNetwork: %s\n"
1741                 "%sIgnoreSIGPIPE: %s\n",
1742                 prefix, c->umask,
1743                 prefix, c->working_directory ? c->working_directory : "/",
1744                 prefix, c->root_directory ? c->root_directory : "/",
1745                 prefix, yes_no(c->non_blocking),
1746                 prefix, yes_no(c->private_tmp),
1747                 prefix, yes_no(c->control_group_modify),
1748                 prefix, yes_no(c->control_group_persistent),
1749                 prefix, yes_no(c->private_network),
1750                 prefix, yes_no(c->ignore_sigpipe));
1751
1752         STRV_FOREACH(e, c->environment)
1753                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1754
1755         STRV_FOREACH(e, c->environment_files)
1756                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1757
1758         if (c->tcpwrap_name)
1759                 fprintf(f,
1760                         "%sTCPWrapName: %s\n",
1761                         prefix, c->tcpwrap_name);
1762
1763         if (c->nice_set)
1764                 fprintf(f,
1765                         "%sNice: %i\n",
1766                         prefix, c->nice);
1767
1768         if (c->oom_score_adjust_set)
1769                 fprintf(f,
1770                         "%sOOMScoreAdjust: %i\n",
1771                         prefix, c->oom_score_adjust);
1772
1773         for (i = 0; i < RLIM_NLIMITS; i++)
1774                 if (c->rlimit[i])
1775                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1776
1777         if (c->ioprio_set) {
1778                 char *class_str;
1779                 int r;
1780
1781                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1782                 if (r < 0)
1783                         class_str = NULL;
1784                 fprintf(f,
1785                         "%sIOSchedulingClass: %s\n"
1786                         "%sIOPriority: %i\n",
1787                         prefix, strna(class_str),
1788                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1789                 free(class_str);
1790         }
1791
1792         if (c->cpu_sched_set) {
1793                 char *policy_str;
1794                 int r;
1795
1796                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1797                 if (r < 0)
1798                         policy_str = NULL;
1799                 fprintf(f,
1800                         "%sCPUSchedulingPolicy: %s\n"
1801                         "%sCPUSchedulingPriority: %i\n"
1802                         "%sCPUSchedulingResetOnFork: %s\n",
1803                         prefix, strna(policy_str),
1804                         prefix, c->cpu_sched_priority,
1805                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1806                 free(policy_str);
1807         }
1808
1809         if (c->cpuset) {
1810                 fprintf(f, "%sCPUAffinity:", prefix);
1811                 for (i = 0; i < c->cpuset_ncpus; i++)
1812                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1813                                 fprintf(f, " %i", i);
1814                 fputs("\n", f);
1815         }
1816
1817         if (c->timer_slack_nsec != (nsec_t) -1)
1818                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1819
1820         fprintf(f,
1821                 "%sStandardInput: %s\n"
1822                 "%sStandardOutput: %s\n"
1823                 "%sStandardError: %s\n",
1824                 prefix, exec_input_to_string(c->std_input),
1825                 prefix, exec_output_to_string(c->std_output),
1826                 prefix, exec_output_to_string(c->std_error));
1827
1828         if (c->tty_path)
1829                 fprintf(f,
1830                         "%sTTYPath: %s\n"
1831                         "%sTTYReset: %s\n"
1832                         "%sTTYVHangup: %s\n"
1833                         "%sTTYVTDisallocate: %s\n",
1834                         prefix, c->tty_path,
1835                         prefix, yes_no(c->tty_reset),
1836                         prefix, yes_no(c->tty_vhangup),
1837                         prefix, yes_no(c->tty_vt_disallocate));
1838
1839         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1840             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1841             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1842             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1843                 char *fac_str, *lvl_str;
1844                 int r;
1845
1846                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1847                 if (r < 0)
1848                         fac_str = NULL;
1849
1850                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1851                 if (r < 0)
1852                         lvl_str = NULL;
1853
1854                 fprintf(f,
1855                         "%sSyslogFacility: %s\n"
1856                         "%sSyslogLevel: %s\n",
1857                         prefix, strna(fac_str),
1858                         prefix, strna(lvl_str));
1859                 free(lvl_str);
1860                 free(fac_str);
1861         }
1862
1863         if (c->capabilities) {
1864                 char *t;
1865                 if ((t = cap_to_text(c->capabilities, NULL))) {
1866                         fprintf(f, "%sCapabilities: %s\n",
1867                                 prefix, t);
1868                         cap_free(t);
1869                 }
1870         }
1871
1872         if (c->secure_bits)
1873                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1874                         prefix,
1875                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1876                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1877                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1878                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1879                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1880                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1881
1882         if (c->capability_bounding_set_drop) {
1883                 unsigned long l;
1884                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1885
1886                 for (l = 0; l <= cap_last_cap(); l++)
1887                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1888                                 char *t;
1889
1890                                 if ((t = cap_to_name(l))) {
1891                                         fprintf(f, " %s", t);
1892                                         cap_free(t);
1893                                 }
1894                         }
1895
1896                 fputs("\n", f);
1897         }
1898
1899         if (c->user)
1900                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1901         if (c->group)
1902                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1903
1904         if (strv_length(c->supplementary_groups) > 0) {
1905                 fprintf(f, "%sSupplementaryGroups:", prefix);
1906                 strv_fprintf(f, c->supplementary_groups);
1907                 fputs("\n", f);
1908         }
1909
1910         if (c->pam_name)
1911                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1912
1913         if (strv_length(c->read_write_dirs) > 0) {
1914                 fprintf(f, "%sReadWriteDirs:", prefix);
1915                 strv_fprintf(f, c->read_write_dirs);
1916                 fputs("\n", f);
1917         }
1918
1919         if (strv_length(c->read_only_dirs) > 0) {
1920                 fprintf(f, "%sReadOnlyDirs:", prefix);
1921                 strv_fprintf(f, c->read_only_dirs);
1922                 fputs("\n", f);
1923         }
1924
1925         if (strv_length(c->inaccessible_dirs) > 0) {
1926                 fprintf(f, "%sInaccessibleDirs:", prefix);
1927                 strv_fprintf(f, c->inaccessible_dirs);
1928                 fputs("\n", f);
1929         }
1930
1931         if (c->utmp_id)
1932                 fprintf(f,
1933                         "%sUtmpIdentifier: %s\n",
1934                         prefix, c->utmp_id);
1935 }
1936
1937 void exec_status_start(ExecStatus *s, pid_t pid) {
1938         assert(s);
1939
1940         zero(*s);
1941         s->pid = pid;
1942         dual_timestamp_get(&s->start_timestamp);
1943 }
1944
1945 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1946         assert(s);
1947
1948         if (s->pid && s->pid != pid)
1949                 zero(*s);
1950
1951         s->pid = pid;
1952         dual_timestamp_get(&s->exit_timestamp);
1953
1954         s->code = code;
1955         s->status = status;
1956
1957         if (context) {
1958                 if (context->utmp_id)
1959                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1960
1961                 exec_context_tty_reset(context);
1962         }
1963 }
1964
1965 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1966         char buf[FORMAT_TIMESTAMP_MAX];
1967
1968         assert(s);
1969         assert(f);
1970
1971         if (!prefix)
1972                 prefix = "";
1973
1974         if (s->pid <= 0)
1975                 return;
1976
1977         fprintf(f,
1978                 "%sPID: %lu\n",
1979                 prefix, (unsigned long) s->pid);
1980
1981         if (s->start_timestamp.realtime > 0)
1982                 fprintf(f,
1983                         "%sStart Timestamp: %s\n",
1984                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1985
1986         if (s->exit_timestamp.realtime > 0)
1987                 fprintf(f,
1988                         "%sExit Timestamp: %s\n"
1989                         "%sExit Code: %s\n"
1990                         "%sExit Status: %i\n",
1991                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1992                         prefix, sigchld_code_to_string(s->code),
1993                         prefix, s->status);
1994 }
1995
1996 char *exec_command_line(char **argv) {
1997         size_t k;
1998         char *n, *p, **a;
1999         bool first = true;
2000
2001         assert(argv);
2002
2003         k = 1;
2004         STRV_FOREACH(a, argv)
2005                 k += strlen(*a)+3;
2006
2007         if (!(n = new(char, k)))
2008                 return NULL;
2009
2010         p = n;
2011         STRV_FOREACH(a, argv) {
2012
2013                 if (!first)
2014                         *(p++) = ' ';
2015                 else
2016                         first = false;
2017
2018                 if (strpbrk(*a, WHITESPACE)) {
2019                         *(p++) = '\'';
2020                         p = stpcpy(p, *a);
2021                         *(p++) = '\'';
2022                 } else
2023                         p = stpcpy(p, *a);
2024
2025         }
2026
2027         *p = 0;
2028
2029         /* FIXME: this doesn't really handle arguments that have
2030          * spaces and ticks in them */
2031
2032         return n;
2033 }
2034
2035 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2036         char *p2;
2037         const char *prefix2;
2038
2039         char *cmd;
2040
2041         assert(c);
2042         assert(f);
2043
2044         if (!prefix)
2045                 prefix = "";
2046         p2 = strappend(prefix, "\t");
2047         prefix2 = p2 ? p2 : prefix;
2048
2049         cmd = exec_command_line(c->argv);
2050
2051         fprintf(f,
2052                 "%sCommand Line: %s\n",
2053                 prefix, cmd ? cmd : strerror(ENOMEM));
2054
2055         free(cmd);
2056
2057         exec_status_dump(&c->exec_status, f, prefix2);
2058
2059         free(p2);
2060 }
2061
2062 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2063         assert(f);
2064
2065         if (!prefix)
2066                 prefix = "";
2067
2068         LIST_FOREACH(command, c, c)
2069                 exec_command_dump(c, f, prefix);
2070 }
2071
2072 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2073         ExecCommand *end;
2074
2075         assert(l);
2076         assert(e);
2077
2078         if (*l) {
2079                 /* It's kind of important, that we keep the order here */
2080                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2081                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2082         } else
2083               *l = e;
2084 }
2085
2086 int exec_command_set(ExecCommand *c, const char *path, ...) {
2087         va_list ap;
2088         char **l, *p;
2089
2090         assert(c);
2091         assert(path);
2092
2093         va_start(ap, path);
2094         l = strv_new_ap(path, ap);
2095         va_end(ap);
2096
2097         if (!l)
2098                 return -ENOMEM;
2099
2100         if (!(p = strdup(path))) {
2101                 strv_free(l);
2102                 return -ENOMEM;
2103         }
2104
2105         free(c->path);
2106         c->path = p;
2107
2108         strv_free(c->argv);
2109         c->argv = l;
2110
2111         return 0;
2112 }
2113
2114 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2115         [EXEC_INPUT_NULL] = "null",
2116         [EXEC_INPUT_TTY] = "tty",
2117         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2118         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2119         [EXEC_INPUT_SOCKET] = "socket"
2120 };
2121
2122 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2123
2124 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2125         [EXEC_OUTPUT_INHERIT] = "inherit",
2126         [EXEC_OUTPUT_NULL] = "null",
2127         [EXEC_OUTPUT_TTY] = "tty",
2128         [EXEC_OUTPUT_SYSLOG] = "syslog",
2129         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2130         [EXEC_OUTPUT_KMSG] = "kmsg",
2131         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2132         [EXEC_OUTPUT_JOURNAL] = "journal",
2133         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2134         [EXEC_OUTPUT_SOCKET] = "socket"
2135 };
2136
2137 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);