chiark / gitweb /
b878c331d58b6c642726b2e0effbc89098baefde
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43
44 #ifdef HAVE_PAM
45 #include <security/pam_appl.h>
46 #endif
47
48 #include "execute.h"
49 #include "strv.h"
50 #include "macro.h"
51 #include "capability.h"
52 #include "util.h"
53 #include "log.h"
54 #include "sd-messages.h"
55 #include "ioprio.h"
56 #include "securebits.h"
57 #include "cgroup.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71
72 /* This assumes there is a 'tty' group */
73 #define TTY_MODE 0620
74
75 static int shift_fds(int fds[], unsigned n_fds) {
76         int start, restart_from;
77
78         if (n_fds <= 0)
79                 return 0;
80
81         /* Modifies the fds array! (sorts it) */
82
83         assert(fds);
84
85         start = 0;
86         for (;;) {
87                 int i;
88
89                 restart_from = -1;
90
91                 for (i = start; i < (int) n_fds; i++) {
92                         int nfd;
93
94                         /* Already at right index? */
95                         if (fds[i] == i+3)
96                                 continue;
97
98                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99                                 return -errno;
100
101                         close_nointr_nofail(fds[i]);
102                         fds[i] = nfd;
103
104                         /* Hmm, the fd we wanted isn't free? Then
105                          * let's remember that and try again from here*/
106                         if (nfd != i+3 && restart_from < 0)
107                                 restart_from = i;
108                 }
109
110                 if (restart_from < 0)
111                         break;
112
113                 start = restart_from;
114         }
115
116         return 0;
117 }
118
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120         unsigned i;
121         int r;
122
123         if (n_fds <= 0)
124                 return 0;
125
126         assert(fds);
127
128         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129
130         for (i = 0; i < n_fds; i++) {
131
132                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133                         return r;
134
135                 /* We unconditionally drop FD_CLOEXEC from the fds,
136                  * since after all we want to pass these fds to our
137                  * children */
138
139                 if ((r = fd_cloexec(fds[i], false)) < 0)
140                         return r;
141         }
142
143         return 0;
144 }
145
146 static const char *tty_path(const ExecContext *context) {
147         assert(context);
148
149         if (context->tty_path)
150                 return context->tty_path;
151
152         return "/dev/console";
153 }
154
155 void exec_context_tty_reset(const ExecContext *context) {
156         assert(context);
157
158         if (context->tty_vhangup)
159                 terminal_vhangup(tty_path(context));
160
161         if (context->tty_reset)
162                 reset_terminal(tty_path(context));
163
164         if (context->tty_vt_disallocate && context->tty_path)
165                 vt_disallocate(context->tty_path);
166 }
167
168 static int open_null_as(int flags, int nfd) {
169         int fd, r;
170
171         assert(nfd >= 0);
172
173         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
174                 return -errno;
175
176         if (fd != nfd) {
177                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178                 close_nointr_nofail(fd);
179         } else
180                 r = nfd;
181
182         return r;
183 }
184
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
186         int fd, r;
187         union sockaddr_union sa;
188
189         assert(context);
190         assert(output < _EXEC_OUTPUT_MAX);
191         assert(ident);
192         assert(nfd >= 0);
193
194         fd = socket(AF_UNIX, SOCK_STREAM, 0);
195         if (fd < 0)
196                 return -errno;
197
198         zero(sa);
199         sa.un.sun_family = AF_UNIX;
200         strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
201
202         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
203         if (r < 0) {
204                 close_nointr_nofail(fd);
205                 return -errno;
206         }
207
208         if (shutdown(fd, SHUT_RD) < 0) {
209                 close_nointr_nofail(fd);
210                 return -errno;
211         }
212
213         dprintf(fd,
214                 "%s\n"
215                 "%s\n"
216                 "%i\n"
217                 "%i\n"
218                 "%i\n"
219                 "%i\n"
220                 "%i\n",
221                 context->syslog_identifier ? context->syslog_identifier : ident,
222                 unit_id,
223                 context->syslog_priority,
224                 !!context->syslog_level_prefix,
225                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227                 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
228
229         if (fd != nfd) {
230                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231                 close_nointr_nofail(fd);
232         } else
233                 r = nfd;
234
235         return r;
236 }
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
238         int fd, r;
239
240         assert(path);
241         assert(nfd >= 0);
242
243         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
244                 return fd;
245
246         if (fd != nfd) {
247                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248                 close_nointr_nofail(fd);
249         } else
250                 r = nfd;
251
252         return r;
253 }
254
255 static bool is_terminal_input(ExecInput i) {
256         return
257                 i == EXEC_INPUT_TTY ||
258                 i == EXEC_INPUT_TTY_FORCE ||
259                 i == EXEC_INPUT_TTY_FAIL;
260 }
261
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
263
264         if (is_terminal_input(std_input) && !apply_tty_stdin)
265                 return EXEC_INPUT_NULL;
266
267         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268                 return EXEC_INPUT_NULL;
269
270         return std_input;
271 }
272
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
274
275         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276                 return EXEC_OUTPUT_INHERIT;
277
278         return std_output;
279 }
280
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
282         ExecInput i;
283
284         assert(context);
285
286         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
287
288         switch (i) {
289
290         case EXEC_INPUT_NULL:
291                 return open_null_as(O_RDONLY, STDIN_FILENO);
292
293         case EXEC_INPUT_TTY:
294         case EXEC_INPUT_TTY_FORCE:
295         case EXEC_INPUT_TTY_FAIL: {
296                 int fd, r;
297
298                 if ((fd = acquire_terminal(
299                                      tty_path(context),
300                                      i == EXEC_INPUT_TTY_FAIL,
301                                      i == EXEC_INPUT_TTY_FORCE,
302                                      false,
303                                      (usec_t) -1)) < 0)
304                         return fd;
305
306                 if (fd != STDIN_FILENO) {
307                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308                         close_nointr_nofail(fd);
309                 } else
310                         r = STDIN_FILENO;
311
312                 return r;
313         }
314
315         case EXEC_INPUT_SOCKET:
316                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
317
318         default:
319                 assert_not_reached("Unknown input type");
320         }
321 }
322
323 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
324         ExecOutput o;
325         ExecInput i;
326         int r;
327
328         assert(context);
329         assert(ident);
330
331         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
332         o = fixup_output(context->std_output, socket_fd);
333
334         /* This expects the input is already set up */
335
336         switch (o) {
337
338         case EXEC_OUTPUT_INHERIT:
339
340                 /* If input got downgraded, inherit the original value */
341                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
342                         return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
343
344                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
345                 if (i != EXEC_INPUT_NULL)
346                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
347
348                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
349                 if (getppid() != 1)
350                         return STDOUT_FILENO;
351
352                 /* We need to open /dev/null here anew, to get the
353                  * right access mode. So we fall through */
354
355         case EXEC_OUTPUT_NULL:
356                 return open_null_as(O_WRONLY, STDOUT_FILENO);
357
358         case EXEC_OUTPUT_TTY:
359                 if (is_terminal_input(i))
360                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
361
362                 /* We don't reset the terminal if this is just about output */
363                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
364
365         case EXEC_OUTPUT_SYSLOG:
366         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
367         case EXEC_OUTPUT_KMSG:
368         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
369         case EXEC_OUTPUT_JOURNAL:
370         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
371                 r = connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
372                 if (r < 0) {
373                         log_error("Failed to connect stdout of %s to the journal socket: %s", unit_id, strerror(-r));
374                         r = open_null_as(O_WRONLY, STDOUT_FILENO);
375                 }
376                 return r;
377
378         case EXEC_OUTPUT_SOCKET:
379                 assert(socket_fd >= 0);
380                 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
381
382         default:
383                 assert_not_reached("Unknown output type");
384         }
385 }
386
387 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
388         ExecOutput o, e;
389         ExecInput i;
390         int r;
391
392         assert(context);
393         assert(ident);
394
395         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
396         o = fixup_output(context->std_output, socket_fd);
397         e = fixup_output(context->std_error, socket_fd);
398
399         /* This expects the input and output are already set up */
400
401         /* Don't change the stderr file descriptor if we inherit all
402          * the way and are not on a tty */
403         if (e == EXEC_OUTPUT_INHERIT &&
404             o == EXEC_OUTPUT_INHERIT &&
405             i == EXEC_INPUT_NULL &&
406             !is_terminal_input(context->std_input) &&
407             getppid () != 1)
408                 return STDERR_FILENO;
409
410         /* Duplicate from stdout if possible */
411         if (e == o || e == EXEC_OUTPUT_INHERIT)
412                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
413
414         switch (e) {
415
416         case EXEC_OUTPUT_NULL:
417                 return open_null_as(O_WRONLY, STDERR_FILENO);
418
419         case EXEC_OUTPUT_TTY:
420                 if (is_terminal_input(i))
421                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
422
423                 /* We don't reset the terminal if this is just about output */
424                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
425
426         case EXEC_OUTPUT_SYSLOG:
427         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
428         case EXEC_OUTPUT_KMSG:
429         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
430         case EXEC_OUTPUT_JOURNAL:
431         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
432                 r = connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
433                 if (r < 0) {
434                         log_error("Failed to connect stderr of %s to the journal socket: %s", unit_id, strerror(-r));
435                         r = open_null_as(O_WRONLY, STDERR_FILENO);
436                 }
437                 return r;
438
439         case EXEC_OUTPUT_SOCKET:
440                 assert(socket_fd >= 0);
441                 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
442
443         default:
444                 assert_not_reached("Unknown error type");
445         }
446 }
447
448 static int chown_terminal(int fd, uid_t uid) {
449         struct stat st;
450
451         assert(fd >= 0);
452
453         /* This might fail. What matters are the results. */
454         (void) fchown(fd, uid, -1);
455         (void) fchmod(fd, TTY_MODE);
456
457         if (fstat(fd, &st) < 0)
458                 return -errno;
459
460         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
461                 return -EPERM;
462
463         return 0;
464 }
465
466 static int setup_confirm_stdio(int *_saved_stdin,
467                                int *_saved_stdout) {
468         int fd = -1, saved_stdin, saved_stdout = -1, r;
469
470         assert(_saved_stdin);
471         assert(_saved_stdout);
472
473         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
474         if (saved_stdin < 0)
475                 return -errno;
476
477         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
478         if (saved_stdout < 0) {
479                 r = errno;
480                 goto fail;
481         }
482
483         fd = acquire_terminal(
484                         "/dev/console",
485                         false,
486                         false,
487                         false,
488                         DEFAULT_CONFIRM_USEC);
489         if (fd < 0) {
490                 r = fd;
491                 goto fail;
492         }
493
494         r = chown_terminal(fd, getuid());
495         if (r < 0)
496                 goto fail;
497
498         if (dup2(fd, STDIN_FILENO) < 0) {
499                 r = -errno;
500                 goto fail;
501         }
502
503         if (dup2(fd, STDOUT_FILENO) < 0) {
504                 r = -errno;
505                 goto fail;
506         }
507
508         if (fd >= 2)
509                 close_nointr_nofail(fd);
510
511         *_saved_stdin = saved_stdin;
512         *_saved_stdout = saved_stdout;
513
514         return 0;
515
516 fail:
517         if (saved_stdout >= 0)
518                 close_nointr_nofail(saved_stdout);
519
520         if (saved_stdin >= 0)
521                 close_nointr_nofail(saved_stdin);
522
523         if (fd >= 0)
524                 close_nointr_nofail(fd);
525
526         return r;
527 }
528
529 static int write_confirm_message(const char *format, ...) {
530         int fd;
531         va_list ap;
532
533         assert(format);
534
535         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
536         if (fd < 0)
537                 return fd;
538
539         va_start(ap, format);
540         vdprintf(fd, format, ap);
541         va_end(ap);
542
543         close_nointr_nofail(fd);
544
545         return 0;
546 }
547
548 static int restore_confirm_stdio(int *saved_stdin,
549                                  int *saved_stdout) {
550
551         int r = 0;
552
553         assert(saved_stdin);
554         assert(saved_stdout);
555
556         release_terminal();
557
558         if (*saved_stdin >= 0)
559                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
560                         r = -errno;
561
562         if (*saved_stdout >= 0)
563                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
564                         r = -errno;
565
566         if (*saved_stdin >= 0)
567                 close_nointr_nofail(*saved_stdin);
568
569         if (*saved_stdout >= 0)
570                 close_nointr_nofail(*saved_stdout);
571
572         return r;
573 }
574
575 static int ask_for_confirmation(char *response, char **argv) {
576         int saved_stdout = -1, saved_stdin = -1, r;
577         char *line;
578
579         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
580         if (r < 0)
581                 return r;
582
583         line = exec_command_line(argv);
584         if (!line)
585                 return -ENOMEM;
586
587         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
588         free(line);
589
590         restore_confirm_stdio(&saved_stdin, &saved_stdout);
591
592         return r;
593 }
594
595 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
596         bool keep_groups = false;
597         int r;
598
599         assert(context);
600
601         /* Lookup and set GID and supplementary group list. Here too
602          * we avoid NSS lookups for gid=0. */
603
604         if (context->group || username) {
605
606                 if (context->group) {
607                         const char *g = context->group;
608
609                         if ((r = get_group_creds(&g, &gid)) < 0)
610                                 return r;
611                 }
612
613                 /* First step, initialize groups from /etc/groups */
614                 if (username && gid != 0) {
615                         if (initgroups(username, gid) < 0)
616                                 return -errno;
617
618                         keep_groups = true;
619                 }
620
621                 /* Second step, set our gids */
622                 if (setresgid(gid, gid, gid) < 0)
623                         return -errno;
624         }
625
626         if (context->supplementary_groups) {
627                 int ngroups_max, k;
628                 gid_t *gids;
629                 char **i;
630
631                 /* Final step, initialize any manually set supplementary groups */
632                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
633
634                 if (!(gids = new(gid_t, ngroups_max)))
635                         return -ENOMEM;
636
637                 if (keep_groups) {
638                         if ((k = getgroups(ngroups_max, gids)) < 0) {
639                                 free(gids);
640                                 return -errno;
641                         }
642                 } else
643                         k = 0;
644
645                 STRV_FOREACH(i, context->supplementary_groups) {
646                         const char *g;
647
648                         if (k >= ngroups_max) {
649                                 free(gids);
650                                 return -E2BIG;
651                         }
652
653                         g = *i;
654                         r = get_group_creds(&g, gids+k);
655                         if (r < 0) {
656                                 free(gids);
657                                 return r;
658                         }
659
660                         k++;
661                 }
662
663                 if (setgroups(k, gids) < 0) {
664                         free(gids);
665                         return -errno;
666                 }
667
668                 free(gids);
669         }
670
671         return 0;
672 }
673
674 static int enforce_user(const ExecContext *context, uid_t uid) {
675         int r;
676         assert(context);
677
678         /* Sets (but doesn't lookup) the uid and make sure we keep the
679          * capabilities while doing so. */
680
681         if (context->capabilities) {
682                 cap_t d;
683                 static const cap_value_t bits[] = {
684                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
685                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
686                 };
687
688                 /* First step: If we need to keep capabilities but
689                  * drop privileges we need to make sure we keep our
690                  * caps, whiel we drop privileges. */
691                 if (uid != 0) {
692                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
693
694                         if (prctl(PR_GET_SECUREBITS) != sb)
695                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
696                                         return -errno;
697                 }
698
699                 /* Second step: set the capabilities. This will reduce
700                  * the capabilities to the minimum we need. */
701
702                 if (!(d = cap_dup(context->capabilities)))
703                         return -errno;
704
705                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
706                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
707                         r = -errno;
708                         cap_free(d);
709                         return r;
710                 }
711
712                 if (cap_set_proc(d) < 0) {
713                         r = -errno;
714                         cap_free(d);
715                         return r;
716                 }
717
718                 cap_free(d);
719         }
720
721         /* Third step: actually set the uids */
722         if (setresuid(uid, uid, uid) < 0)
723                 return -errno;
724
725         /* At this point we should have all necessary capabilities but
726            are otherwise a normal user. However, the caps might got
727            corrupted due to the setresuid() so we need clean them up
728            later. This is done outside of this call. */
729
730         return 0;
731 }
732
733 #ifdef HAVE_PAM
734
735 static int null_conv(
736                 int num_msg,
737                 const struct pam_message **msg,
738                 struct pam_response **resp,
739                 void *appdata_ptr) {
740
741         /* We don't support conversations */
742
743         return PAM_CONV_ERR;
744 }
745
746 static int setup_pam(
747                 const char *name,
748                 const char *user,
749                 uid_t uid,
750                 const char *tty,
751                 char ***pam_env,
752                 int fds[], unsigned n_fds) {
753
754         static const struct pam_conv conv = {
755                 .conv = null_conv,
756                 .appdata_ptr = NULL
757         };
758
759         pam_handle_t *handle = NULL;
760         sigset_t ss, old_ss;
761         int pam_code = PAM_SUCCESS;
762         int err;
763         char **e = NULL;
764         bool close_session = false;
765         pid_t pam_pid = 0, parent_pid;
766
767         assert(name);
768         assert(user);
769         assert(pam_env);
770
771         /* We set up PAM in the parent process, then fork. The child
772          * will then stay around until killed via PR_GET_PDEATHSIG or
773          * systemd via the cgroup logic. It will then remove the PAM
774          * session again. The parent process will exec() the actual
775          * daemon. We do things this way to ensure that the main PID
776          * of the daemon is the one we initially fork()ed. */
777
778         if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
779                 handle = NULL;
780                 goto fail;
781         }
782
783         if (tty)
784                 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
785                         goto fail;
786
787         if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
788                 goto fail;
789
790         if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
791                 goto fail;
792
793         close_session = true;
794
795         if ((!(e = pam_getenvlist(handle)))) {
796                 pam_code = PAM_BUF_ERR;
797                 goto fail;
798         }
799
800         /* Block SIGTERM, so that we know that it won't get lost in
801          * the child */
802         if (sigemptyset(&ss) < 0 ||
803             sigaddset(&ss, SIGTERM) < 0 ||
804             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
805                 goto fail;
806
807         parent_pid = getpid();
808
809         if ((pam_pid = fork()) < 0)
810                 goto fail;
811
812         if (pam_pid == 0) {
813                 int sig;
814                 int r = EXIT_PAM;
815
816                 /* The child's job is to reset the PAM session on
817                  * termination */
818
819                 /* This string must fit in 10 chars (i.e. the length
820                  * of "/sbin/init"), to look pretty in /bin/ps */
821                 rename_process("(sd-pam)");
822
823                 /* Make sure we don't keep open the passed fds in this
824                 child. We assume that otherwise only those fds are
825                 open here that have been opened by PAM. */
826                 close_many(fds, n_fds);
827
828                 /* Drop privileges - we don't need any to pam_close_session
829                  * and this will make PR_SET_PDEATHSIG work in most cases.
830                  * If this fails, ignore the error - but expect sd-pam threads
831                  * to fail to exit normally */
832                 if (setresuid(uid, uid, uid) < 0)
833                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
834
835                 /* Wait until our parent died. This will only work if
836                  * the above setresuid() succeeds, otherwise the kernel
837                  * will not allow unprivileged parents kill their privileged
838                  * children this way. We rely on the control groups kill logic
839                  * to do the rest for us. */
840                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
841                         goto child_finish;
842
843                 /* Check if our parent process might already have
844                  * died? */
845                 if (getppid() == parent_pid) {
846                         for (;;) {
847                                 if (sigwait(&ss, &sig) < 0) {
848                                         if (errno == EINTR)
849                                                 continue;
850
851                                         goto child_finish;
852                                 }
853
854                                 assert(sig == SIGTERM);
855                                 break;
856                         }
857                 }
858
859                 /* If our parent died we'll end the session */
860                 if (getppid() != parent_pid)
861                         if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
862                                 goto child_finish;
863
864                 r = 0;
865
866         child_finish:
867                 pam_end(handle, pam_code | PAM_DATA_SILENT);
868                 _exit(r);
869         }
870
871         /* If the child was forked off successfully it will do all the
872          * cleanups, so forget about the handle here. */
873         handle = NULL;
874
875         /* Unblock SIGTERM again in the parent */
876         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
877                 goto fail;
878
879         /* We close the log explicitly here, since the PAM modules
880          * might have opened it, but we don't want this fd around. */
881         closelog();
882
883         *pam_env = e;
884         e = NULL;
885
886         return 0;
887
888 fail:
889         if (pam_code != PAM_SUCCESS)
890                 err = -EPERM;  /* PAM errors do not map to errno */
891         else
892                 err = -errno;
893
894         if (handle) {
895                 if (close_session)
896                         pam_code = pam_close_session(handle, PAM_DATA_SILENT);
897
898                 pam_end(handle, pam_code | PAM_DATA_SILENT);
899         }
900
901         strv_free(e);
902
903         closelog();
904
905         if (pam_pid > 1) {
906                 kill(pam_pid, SIGTERM);
907                 kill(pam_pid, SIGCONT);
908         }
909
910         return err;
911 }
912 #endif
913
914 static void rename_process_from_path(const char *path) {
915         char process_name[11];
916         const char *p;
917         size_t l;
918
919         /* This resulting string must fit in 10 chars (i.e. the length
920          * of "/sbin/init") to look pretty in /bin/ps */
921
922         p = path_get_file_name(path);
923         if (isempty(p)) {
924                 rename_process("(...)");
925                 return;
926         }
927
928         l = strlen(p);
929         if (l > 8) {
930                 /* The end of the process name is usually more
931                  * interesting, since the first bit might just be
932                  * "systemd-" */
933                 p = p + l - 8;
934                 l = 8;
935         }
936
937         process_name[0] = '(';
938         memcpy(process_name+1, p, l);
939         process_name[1+l] = ')';
940         process_name[1+l+1] = 0;
941
942         rename_process(process_name);
943 }
944
945 static int apply_seccomp(uint32_t *syscall_filter) {
946         static const struct sock_filter header[] = {
947                 VALIDATE_ARCHITECTURE,
948                 EXAMINE_SYSCALL
949         };
950         static const struct sock_filter footer[] = {
951                 _KILL_PROCESS
952         };
953
954         int i;
955         unsigned n;
956         struct sock_filter *f;
957         struct sock_fprog prog;
958
959         assert(syscall_filter);
960
961         /* First: count the syscalls to check for */
962         for (i = 0, n = 0; i < syscall_max(); i++)
963                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
964                         n++;
965
966         /* Second: build the filter program from a header the syscall
967          * matches and the footer */
968         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
969         memcpy(f, header, sizeof(header));
970
971         for (i = 0, n = 0; i < syscall_max(); i++)
972                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
973                         struct sock_filter item[] = {
974                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
975                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
976                         };
977
978                         assert_cc(ELEMENTSOF(item) == 2);
979
980                         f[ELEMENTSOF(header) + 2*n]  = item[0];
981                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
982
983                         n++;
984                 }
985
986         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
987
988         /* Third: install the filter */
989         zero(prog);
990         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
991         prog.filter = f;
992         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
993                 return -errno;
994
995         return 0;
996 }
997
998 int exec_spawn(ExecCommand *command,
999                char **argv,
1000                const ExecContext *context,
1001                int fds[], unsigned n_fds,
1002                char **environment,
1003                bool apply_permissions,
1004                bool apply_chroot,
1005                bool apply_tty_stdin,
1006                bool confirm_spawn,
1007                CGroupBonding *cgroup_bondings,
1008                CGroupAttribute *cgroup_attributes,
1009                const char *cgroup_suffix,
1010                const char *unit_id,
1011                int idle_pipe[2],
1012                pid_t *ret) {
1013
1014         pid_t pid;
1015         int r;
1016         char *line;
1017         int socket_fd;
1018         char _cleanup_strv_free_ **files_env = NULL;
1019
1020         assert(command);
1021         assert(context);
1022         assert(ret);
1023         assert(fds || n_fds <= 0);
1024
1025         if (context->std_input == EXEC_INPUT_SOCKET ||
1026             context->std_output == EXEC_OUTPUT_SOCKET ||
1027             context->std_error == EXEC_OUTPUT_SOCKET) {
1028
1029                 if (n_fds != 1)
1030                         return -EINVAL;
1031
1032                 socket_fd = fds[0];
1033
1034                 fds = NULL;
1035                 n_fds = 0;
1036         } else
1037                 socket_fd = -1;
1038
1039         r = exec_context_load_environment(context, &files_env);
1040         if (r < 0) {
1041                 log_struct_unit(LOG_ERR,
1042                            unit_id,
1043                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1044                            "ERRNO=%d", -r,
1045                            NULL);
1046                 return r;
1047         }
1048
1049         if (!argv)
1050                 argv = command->argv;
1051
1052         line = exec_command_line(argv);
1053         if (!line)
1054                 return log_oom();
1055
1056         log_struct_unit(LOG_DEBUG,
1057                    unit_id,
1058                    "MESSAGE=About to execute %s", line,
1059                    NULL);
1060         free(line);
1061
1062         r = cgroup_bonding_realize_list(cgroup_bondings);
1063         if (r < 0)
1064                 return r;
1065
1066         cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1067
1068         pid = fork();
1069         if (pid < 0)
1070                 return -errno;
1071
1072         if (pid == 0) {
1073                 int i, err;
1074                 sigset_t ss;
1075                 const char *username = NULL, *home = NULL;
1076                 uid_t uid = (uid_t) -1;
1077                 gid_t gid = (gid_t) -1;
1078                 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1079                         **final_env = NULL, **final_argv = NULL;
1080                 unsigned n_env = 0;
1081                 bool set_access = false;
1082
1083                 /* child */
1084
1085                 rename_process_from_path(command->path);
1086
1087                 /* We reset exactly these signals, since they are the
1088                  * only ones we set to SIG_IGN in the main daemon. All
1089                  * others we leave untouched because we set them to
1090                  * SIG_DFL or a valid handler initially, both of which
1091                  * will be demoted to SIG_DFL. */
1092                 default_signals(SIGNALS_CRASH_HANDLER,
1093                                 SIGNALS_IGNORE, -1);
1094
1095                 if (context->ignore_sigpipe)
1096                         ignore_signals(SIGPIPE, -1);
1097
1098                 assert_se(sigemptyset(&ss) == 0);
1099                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1100                         err = -errno;
1101                         r = EXIT_SIGNAL_MASK;
1102                         goto fail_child;
1103                 }
1104
1105                 if (idle_pipe) {
1106                         if (idle_pipe[1] >= 0)
1107                                 close_nointr_nofail(idle_pipe[1]);
1108                         if (idle_pipe[0] >= 0) {
1109                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1110                                 close_nointr_nofail(idle_pipe[0]);
1111                         }
1112                 }
1113
1114                 /* Close sockets very early to make sure we don't
1115                  * block init reexecution because it cannot bind its
1116                  * sockets */
1117                 log_forget_fds();
1118                 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1119                                            socket_fd >= 0 ? 1 : n_fds);
1120                 if (err < 0) {
1121                         r = EXIT_FDS;
1122                         goto fail_child;
1123                 }
1124
1125                 if (!context->same_pgrp)
1126                         if (setsid() < 0) {
1127                                 err = -errno;
1128                                 r = EXIT_SETSID;
1129                                 goto fail_child;
1130                         }
1131
1132                 if (context->tcpwrap_name) {
1133                         if (socket_fd >= 0)
1134                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1135                                         err = -EACCES;
1136                                         r = EXIT_TCPWRAP;
1137                                         goto fail_child;
1138                                 }
1139
1140                         for (i = 0; i < (int) n_fds; i++) {
1141                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1142                                         err = -EACCES;
1143                                         r = EXIT_TCPWRAP;
1144                                         goto fail_child;
1145                                 }
1146                         }
1147                 }
1148
1149                 exec_context_tty_reset(context);
1150
1151                 if (confirm_spawn) {
1152                         char response;
1153
1154                         err = ask_for_confirmation(&response, argv);
1155                         if (err == -ETIMEDOUT)
1156                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1157                         else if (err < 0)
1158                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1159                         else if (response == 's') {
1160                                 write_confirm_message("Skipping execution.\n");
1161                                 err = -ECANCELED;
1162                                 r = EXIT_CONFIRM;
1163                                 goto fail_child;
1164                         } else if (response == 'n') {
1165                                 write_confirm_message("Failing execution.\n");
1166                                 err = r = 0;
1167                                 goto fail_child;
1168                         }
1169                 }
1170
1171                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1172                  * must sure to drop O_NONBLOCK */
1173                 if (socket_fd >= 0)
1174                         fd_nonblock(socket_fd, false);
1175
1176                 err = setup_input(context, socket_fd, apply_tty_stdin);
1177                 if (err < 0) {
1178                         r = EXIT_STDIN;
1179                         goto fail_child;
1180                 }
1181
1182                 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1183                 if (err < 0) {
1184                         r = EXIT_STDOUT;
1185                         goto fail_child;
1186                 }
1187
1188                 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1189                 if (err < 0) {
1190                         r = EXIT_STDERR;
1191                         goto fail_child;
1192                 }
1193
1194                 if (cgroup_bondings) {
1195                         err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1196                         if (err < 0) {
1197                                 r = EXIT_CGROUP;
1198                                 goto fail_child;
1199                         }
1200                 }
1201
1202                 if (context->oom_score_adjust_set) {
1203                         char t[16];
1204
1205                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1206                         char_array_0(t);
1207
1208                         if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1209                                 err = -errno;
1210                                 r = EXIT_OOM_ADJUST;
1211                                 goto fail_child;
1212                         }
1213                 }
1214
1215                 if (context->nice_set)
1216                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1217                                 err = -errno;
1218                                 r = EXIT_NICE;
1219                                 goto fail_child;
1220                         }
1221
1222                 if (context->cpu_sched_set) {
1223                         struct sched_param param;
1224
1225                         zero(param);
1226                         param.sched_priority = context->cpu_sched_priority;
1227
1228                         if (sched_setscheduler(0, context->cpu_sched_policy |
1229                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
1230                                 err = -errno;
1231                                 r = EXIT_SETSCHEDULER;
1232                                 goto fail_child;
1233                         }
1234                 }
1235
1236                 if (context->cpuset)
1237                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1238                                 err = -errno;
1239                                 r = EXIT_CPUAFFINITY;
1240                                 goto fail_child;
1241                         }
1242
1243                 if (context->ioprio_set)
1244                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1245                                 err = -errno;
1246                                 r = EXIT_IOPRIO;
1247                                 goto fail_child;
1248                         }
1249
1250                 if (context->timer_slack_nsec != (nsec_t) -1)
1251                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1252                                 err = -errno;
1253                                 r = EXIT_TIMERSLACK;
1254                                 goto fail_child;
1255                         }
1256
1257                 if (context->utmp_id)
1258                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1259
1260                 if (context->user) {
1261                         username = context->user;
1262                         err = get_user_creds(&username, &uid, &gid, &home, NULL);
1263                         if (err < 0) {
1264                                 r = EXIT_USER;
1265                                 goto fail_child;
1266                         }
1267
1268                         if (is_terminal_input(context->std_input)) {
1269                                 err = chown_terminal(STDIN_FILENO, uid);
1270                                 if (err < 0) {
1271                                         r = EXIT_STDIN;
1272                                         goto fail_child;
1273                                 }
1274                         }
1275
1276                         if (cgroup_bondings && context->control_group_modify) {
1277                                 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1278                                 if (err >= 0)
1279                                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1280                                 if (err < 0) {
1281                                         r = EXIT_CGROUP;
1282                                         goto fail_child;
1283                                 }
1284
1285                                 set_access = true;
1286                         }
1287                 }
1288
1289                 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0)  {
1290                         err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1291                         if (err < 0) {
1292                                 r = EXIT_CGROUP;
1293                                 goto fail_child;
1294                         }
1295                 }
1296
1297                 if (apply_permissions) {
1298                         err = enforce_groups(context, username, gid);
1299                         if (err < 0) {
1300                                 r = EXIT_GROUP;
1301                                 goto fail_child;
1302                         }
1303                 }
1304
1305                 umask(context->umask);
1306
1307 #ifdef HAVE_PAM
1308                 if (apply_permissions && context->pam_name && username) {
1309                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1310                         if (err < 0) {
1311                                 r = EXIT_PAM;
1312                                 goto fail_child;
1313                         }
1314                 }
1315 #endif
1316                 if (context->private_network) {
1317                         if (unshare(CLONE_NEWNET) < 0) {
1318                                 err = -errno;
1319                                 r = EXIT_NETWORK;
1320                                 goto fail_child;
1321                         }
1322
1323                         loopback_setup();
1324                 }
1325
1326                 if (strv_length(context->read_write_dirs) > 0 ||
1327                     strv_length(context->read_only_dirs) > 0 ||
1328                     strv_length(context->inaccessible_dirs) > 0 ||
1329                     context->mount_flags != 0 ||
1330                     context->private_tmp) {
1331                         err = setup_namespace(context->read_write_dirs,
1332                                               context->read_only_dirs,
1333                                               context->inaccessible_dirs,
1334                                               context->private_tmp,
1335                                               context->mount_flags);
1336                         if (err < 0) {
1337                                 r = EXIT_NAMESPACE;
1338                                 goto fail_child;
1339                         }
1340                 }
1341
1342                 if (apply_chroot) {
1343                         if (context->root_directory)
1344                                 if (chroot(context->root_directory) < 0) {
1345                                         err = -errno;
1346                                         r = EXIT_CHROOT;
1347                                         goto fail_child;
1348                                 }
1349
1350                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1351                                 err = -errno;
1352                                 r = EXIT_CHDIR;
1353                                 goto fail_child;
1354                         }
1355                 } else {
1356                         char _cleanup_free_ *d = NULL;
1357
1358                         if (asprintf(&d, "%s/%s",
1359                                      context->root_directory ? context->root_directory : "",
1360                                      context->working_directory ? context->working_directory : "") < 0) {
1361                                 err = -ENOMEM;
1362                                 r = EXIT_MEMORY;
1363                                 goto fail_child;
1364                         }
1365
1366                         if (chdir(d) < 0) {
1367                                 err = -errno;
1368                                 r = EXIT_CHDIR;
1369                                 goto fail_child;
1370                         }
1371                 }
1372
1373                 /* We repeat the fd closing here, to make sure that
1374                  * nothing is leaked from the PAM modules */
1375                 err = close_all_fds(fds, n_fds);
1376                 if (err >= 0)
1377                         err = shift_fds(fds, n_fds);
1378                 if (err >= 0)
1379                         err = flags_fds(fds, n_fds, context->non_blocking);
1380                 if (err < 0) {
1381                         r = EXIT_FDS;
1382                         goto fail_child;
1383                 }
1384
1385                 if (apply_permissions) {
1386
1387                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1388                                 if (!context->rlimit[i])
1389                                         continue;
1390
1391                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1392                                         err = -errno;
1393                                         r = EXIT_LIMITS;
1394                                         goto fail_child;
1395                                 }
1396                         }
1397
1398                         if (context->capability_bounding_set_drop) {
1399                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1400                                 if (err < 0) {
1401                                         r = EXIT_CAPABILITIES;
1402                                         goto fail_child;
1403                                 }
1404                         }
1405
1406                         if (context->user) {
1407                                 err = enforce_user(context, uid);
1408                                 if (err < 0) {
1409                                         r = EXIT_USER;
1410                                         goto fail_child;
1411                                 }
1412                         }
1413
1414                         /* PR_GET_SECUREBITS is not privileged, while
1415                          * PR_SET_SECUREBITS is. So to suppress
1416                          * potential EPERMs we'll try not to call
1417                          * PR_SET_SECUREBITS unless necessary. */
1418                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1419                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1420                                         err = -errno;
1421                                         r = EXIT_SECUREBITS;
1422                                         goto fail_child;
1423                                 }
1424
1425                         if (context->capabilities)
1426                                 if (cap_set_proc(context->capabilities) < 0) {
1427                                         err = -errno;
1428                                         r = EXIT_CAPABILITIES;
1429                                         goto fail_child;
1430                                 }
1431
1432                         if (context->no_new_privileges)
1433                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1434                                         err = -errno;
1435                                         r = EXIT_NO_NEW_PRIVILEGES;
1436                                         goto fail_child;
1437                                 }
1438
1439                         if (context->syscall_filter) {
1440                                 err = apply_seccomp(context->syscall_filter);
1441                                 if (err < 0) {
1442                                         r = EXIT_SECCOMP;
1443                                         goto fail_child;
1444                                 }
1445                         }
1446                 }
1447
1448                 if (!(our_env = new0(char*, 7))) {
1449                         err = -ENOMEM;
1450                         r = EXIT_MEMORY;
1451                         goto fail_child;
1452                 }
1453
1454                 if (n_fds > 0)
1455                         if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1456                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1457                                 err = -ENOMEM;
1458                                 r = EXIT_MEMORY;
1459                                 goto fail_child;
1460                         }
1461
1462                 if (home)
1463                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1464                                 err = -ENOMEM;
1465                                 r = EXIT_MEMORY;
1466                                 goto fail_child;
1467                         }
1468
1469                 if (username)
1470                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1471                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1472                                 err = -ENOMEM;
1473                                 r = EXIT_MEMORY;
1474                                 goto fail_child;
1475                         }
1476
1477                 if (is_terminal_input(context->std_input) ||
1478                     context->std_output == EXEC_OUTPUT_TTY ||
1479                     context->std_error == EXEC_OUTPUT_TTY)
1480                         if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1481                                 err = -ENOMEM;
1482                                 r = EXIT_MEMORY;
1483                                 goto fail_child;
1484                         }
1485
1486                 assert(n_env <= 7);
1487
1488                 if (!(final_env = strv_env_merge(
1489                                       5,
1490                                       environment,
1491                                       our_env,
1492                                       context->environment,
1493                                       files_env,
1494                                       pam_env,
1495                                       NULL))) {
1496                         err = -ENOMEM;
1497                         r = EXIT_MEMORY;
1498                         goto fail_child;
1499                 }
1500
1501                 if (!(final_argv = replace_env_argv(argv, final_env))) {
1502                         err = -ENOMEM;
1503                         r = EXIT_MEMORY;
1504                         goto fail_child;
1505                 }
1506
1507                 final_env = strv_env_clean(final_env);
1508
1509                 execve(command->path, final_argv, final_env);
1510                 err = -errno;
1511                 r = EXIT_EXEC;
1512
1513         fail_child:
1514                 if (r != 0) {
1515                         log_open();
1516                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1517                                    "EXECUTABLE=%s", command->path,
1518                                    "MESSAGE=Failed at step %s spawning %s: %s",
1519                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1520                                           command->path, strerror(-err),
1521                                    "ERRNO=%d", -err,
1522                                    NULL);
1523                         log_close();
1524                 }
1525
1526                 _exit(r);
1527         }
1528
1529         log_struct_unit(LOG_DEBUG,
1530                    unit_id,
1531                    "MESSAGE=Forked %s as %lu",
1532                           command->path, (unsigned long) pid,
1533                    NULL);
1534
1535         /* We add the new process to the cgroup both in the child (so
1536          * that we can be sure that no user code is ever executed
1537          * outside of the cgroup) and in the parent (so that we can be
1538          * sure that when we kill the cgroup the process will be
1539          * killed too). */
1540         if (cgroup_bondings)
1541                 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1542
1543         exec_status_start(&command->exec_status, pid);
1544
1545         *ret = pid;
1546         return 0;
1547 }
1548
1549 void exec_context_init(ExecContext *c) {
1550         assert(c);
1551
1552         c->umask = 0022;
1553         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1554         c->cpu_sched_policy = SCHED_OTHER;
1555         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1556         c->syslog_level_prefix = true;
1557         c->control_group_persistent = -1;
1558         c->ignore_sigpipe = true;
1559         c->timer_slack_nsec = (nsec_t) -1;
1560 }
1561
1562 void exec_context_done(ExecContext *c) {
1563         unsigned l;
1564
1565         assert(c);
1566
1567         strv_free(c->environment);
1568         c->environment = NULL;
1569
1570         strv_free(c->environment_files);
1571         c->environment_files = NULL;
1572
1573         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1574                 free(c->rlimit[l]);
1575                 c->rlimit[l] = NULL;
1576         }
1577
1578         free(c->working_directory);
1579         c->working_directory = NULL;
1580         free(c->root_directory);
1581         c->root_directory = NULL;
1582
1583         free(c->tty_path);
1584         c->tty_path = NULL;
1585
1586         free(c->tcpwrap_name);
1587         c->tcpwrap_name = NULL;
1588
1589         free(c->syslog_identifier);
1590         c->syslog_identifier = NULL;
1591
1592         free(c->user);
1593         c->user = NULL;
1594
1595         free(c->group);
1596         c->group = NULL;
1597
1598         strv_free(c->supplementary_groups);
1599         c->supplementary_groups = NULL;
1600
1601         free(c->pam_name);
1602         c->pam_name = NULL;
1603
1604         if (c->capabilities) {
1605                 cap_free(c->capabilities);
1606                 c->capabilities = NULL;
1607         }
1608
1609         strv_free(c->read_only_dirs);
1610         c->read_only_dirs = NULL;
1611
1612         strv_free(c->read_write_dirs);
1613         c->read_write_dirs = NULL;
1614
1615         strv_free(c->inaccessible_dirs);
1616         c->inaccessible_dirs = NULL;
1617
1618         if (c->cpuset)
1619                 CPU_FREE(c->cpuset);
1620
1621         free(c->utmp_id);
1622         c->utmp_id = NULL;
1623
1624         free(c->syscall_filter);
1625         c->syscall_filter = NULL;
1626 }
1627
1628 void exec_command_done(ExecCommand *c) {
1629         assert(c);
1630
1631         free(c->path);
1632         c->path = NULL;
1633
1634         strv_free(c->argv);
1635         c->argv = NULL;
1636 }
1637
1638 void exec_command_done_array(ExecCommand *c, unsigned n) {
1639         unsigned i;
1640
1641         for (i = 0; i < n; i++)
1642                 exec_command_done(c+i);
1643 }
1644
1645 void exec_command_free_list(ExecCommand *c) {
1646         ExecCommand *i;
1647
1648         while ((i = c)) {
1649                 LIST_REMOVE(ExecCommand, command, c, i);
1650                 exec_command_done(i);
1651                 free(i);
1652         }
1653 }
1654
1655 void exec_command_free_array(ExecCommand **c, unsigned n) {
1656         unsigned i;
1657
1658         for (i = 0; i < n; i++) {
1659                 exec_command_free_list(c[i]);
1660                 c[i] = NULL;
1661         }
1662 }
1663
1664 int exec_context_load_environment(const ExecContext *c, char ***l) {
1665         char **i, **r = NULL;
1666
1667         assert(c);
1668         assert(l);
1669
1670         STRV_FOREACH(i, c->environment_files) {
1671                 char *fn;
1672                 int k;
1673                 bool ignore = false;
1674                 char **p;
1675                 glob_t pglob;
1676                 int count, n;
1677
1678                 fn = *i;
1679
1680                 if (fn[0] == '-') {
1681                         ignore = true;
1682                         fn ++;
1683                 }
1684
1685                 if (!path_is_absolute(fn)) {
1686
1687                         if (ignore)
1688                                 continue;
1689
1690                         strv_free(r);
1691                         return -EINVAL;
1692                 }
1693
1694                 /* Filename supports globbing, take all matching files */
1695                 zero(pglob);
1696                 errno = 0;
1697                 if (glob(fn, 0, NULL, &pglob) != 0) {
1698                         globfree(&pglob);
1699                         if (ignore)
1700                                 continue;
1701
1702                         strv_free(r);
1703                         return errno ? -errno : -EINVAL;
1704                 }
1705                 count = pglob.gl_pathc;
1706                 if (count == 0) {
1707                         globfree(&pglob);
1708                         if (ignore)
1709                                 continue;
1710
1711                         strv_free(r);
1712                         return -EINVAL;
1713                 }
1714                 for (n = 0; n < count; n++) {
1715                         k = load_env_file(pglob.gl_pathv[n], &p);
1716                         if (k < 0) {
1717                                 if (ignore)
1718                                         continue;
1719
1720                                 strv_free(r);
1721                                 globfree(&pglob);
1722                                 return k;
1723                          }
1724
1725                         if (r == NULL)
1726                                 r = p;
1727                         else {
1728                                 char **m;
1729
1730                                 m = strv_env_merge(2, r, p);
1731                                 strv_free(r);
1732                                 strv_free(p);
1733
1734                                 if (!m) {
1735                                         globfree(&pglob);
1736                                         return -ENOMEM;
1737                                 }
1738
1739                                 r = m;
1740                         }
1741                 }
1742                 globfree(&pglob);
1743         }
1744
1745         *l = r;
1746
1747         return 0;
1748 }
1749
1750 static void strv_fprintf(FILE *f, char **l) {
1751         char **g;
1752
1753         assert(f);
1754
1755         STRV_FOREACH(g, l)
1756                 fprintf(f, " %s", *g);
1757 }
1758
1759 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1760         char ** e;
1761         unsigned i;
1762
1763         assert(c);
1764         assert(f);
1765
1766         if (!prefix)
1767                 prefix = "";
1768
1769         fprintf(f,
1770                 "%sUMask: %04o\n"
1771                 "%sWorkingDirectory: %s\n"
1772                 "%sRootDirectory: %s\n"
1773                 "%sNonBlocking: %s\n"
1774                 "%sPrivateTmp: %s\n"
1775                 "%sControlGroupModify: %s\n"
1776                 "%sControlGroupPersistent: %s\n"
1777                 "%sPrivateNetwork: %s\n"
1778                 "%sIgnoreSIGPIPE: %s\n",
1779                 prefix, c->umask,
1780                 prefix, c->working_directory ? c->working_directory : "/",
1781                 prefix, c->root_directory ? c->root_directory : "/",
1782                 prefix, yes_no(c->non_blocking),
1783                 prefix, yes_no(c->private_tmp),
1784                 prefix, yes_no(c->control_group_modify),
1785                 prefix, yes_no(c->control_group_persistent),
1786                 prefix, yes_no(c->private_network),
1787                 prefix, yes_no(c->ignore_sigpipe));
1788
1789         STRV_FOREACH(e, c->environment)
1790                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1791
1792         STRV_FOREACH(e, c->environment_files)
1793                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1794
1795         if (c->tcpwrap_name)
1796                 fprintf(f,
1797                         "%sTCPWrapName: %s\n",
1798                         prefix, c->tcpwrap_name);
1799
1800         if (c->nice_set)
1801                 fprintf(f,
1802                         "%sNice: %i\n",
1803                         prefix, c->nice);
1804
1805         if (c->oom_score_adjust_set)
1806                 fprintf(f,
1807                         "%sOOMScoreAdjust: %i\n",
1808                         prefix, c->oom_score_adjust);
1809
1810         for (i = 0; i < RLIM_NLIMITS; i++)
1811                 if (c->rlimit[i])
1812                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1813
1814         if (c->ioprio_set) {
1815                 char *class_str;
1816                 int r;
1817
1818                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1819                 if (r < 0)
1820                         class_str = NULL;
1821                 fprintf(f,
1822                         "%sIOSchedulingClass: %s\n"
1823                         "%sIOPriority: %i\n",
1824                         prefix, strna(class_str),
1825                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1826                 free(class_str);
1827         }
1828
1829         if (c->cpu_sched_set) {
1830                 char *policy_str;
1831                 int r;
1832
1833                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1834                 if (r < 0)
1835                         policy_str = NULL;
1836                 fprintf(f,
1837                         "%sCPUSchedulingPolicy: %s\n"
1838                         "%sCPUSchedulingPriority: %i\n"
1839                         "%sCPUSchedulingResetOnFork: %s\n",
1840                         prefix, strna(policy_str),
1841                         prefix, c->cpu_sched_priority,
1842                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1843                 free(policy_str);
1844         }
1845
1846         if (c->cpuset) {
1847                 fprintf(f, "%sCPUAffinity:", prefix);
1848                 for (i = 0; i < c->cpuset_ncpus; i++)
1849                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1850                                 fprintf(f, " %i", i);
1851                 fputs("\n", f);
1852         }
1853
1854         if (c->timer_slack_nsec != (nsec_t) -1)
1855                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1856
1857         fprintf(f,
1858                 "%sStandardInput: %s\n"
1859                 "%sStandardOutput: %s\n"
1860                 "%sStandardError: %s\n",
1861                 prefix, exec_input_to_string(c->std_input),
1862                 prefix, exec_output_to_string(c->std_output),
1863                 prefix, exec_output_to_string(c->std_error));
1864
1865         if (c->tty_path)
1866                 fprintf(f,
1867                         "%sTTYPath: %s\n"
1868                         "%sTTYReset: %s\n"
1869                         "%sTTYVHangup: %s\n"
1870                         "%sTTYVTDisallocate: %s\n",
1871                         prefix, c->tty_path,
1872                         prefix, yes_no(c->tty_reset),
1873                         prefix, yes_no(c->tty_vhangup),
1874                         prefix, yes_no(c->tty_vt_disallocate));
1875
1876         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1877             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1878             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1879             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1880                 char *fac_str, *lvl_str;
1881                 int r;
1882
1883                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1884                 if (r < 0)
1885                         fac_str = NULL;
1886
1887                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1888                 if (r < 0)
1889                         lvl_str = NULL;
1890
1891                 fprintf(f,
1892                         "%sSyslogFacility: %s\n"
1893                         "%sSyslogLevel: %s\n",
1894                         prefix, strna(fac_str),
1895                         prefix, strna(lvl_str));
1896                 free(lvl_str);
1897                 free(fac_str);
1898         }
1899
1900         if (c->capabilities) {
1901                 char *t;
1902                 if ((t = cap_to_text(c->capabilities, NULL))) {
1903                         fprintf(f, "%sCapabilities: %s\n",
1904                                 prefix, t);
1905                         cap_free(t);
1906                 }
1907         }
1908
1909         if (c->secure_bits)
1910                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1911                         prefix,
1912                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1913                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1914                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1915                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1916                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1917                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1918
1919         if (c->capability_bounding_set_drop) {
1920                 unsigned long l;
1921                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1922
1923                 for (l = 0; l <= cap_last_cap(); l++)
1924                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1925                                 char *t;
1926
1927                                 if ((t = cap_to_name(l))) {
1928                                         fprintf(f, " %s", t);
1929                                         cap_free(t);
1930                                 }
1931                         }
1932
1933                 fputs("\n", f);
1934         }
1935
1936         if (c->user)
1937                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1938         if (c->group)
1939                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1940
1941         if (strv_length(c->supplementary_groups) > 0) {
1942                 fprintf(f, "%sSupplementaryGroups:", prefix);
1943                 strv_fprintf(f, c->supplementary_groups);
1944                 fputs("\n", f);
1945         }
1946
1947         if (c->pam_name)
1948                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1949
1950         if (strv_length(c->read_write_dirs) > 0) {
1951                 fprintf(f, "%sReadWriteDirs:", prefix);
1952                 strv_fprintf(f, c->read_write_dirs);
1953                 fputs("\n", f);
1954         }
1955
1956         if (strv_length(c->read_only_dirs) > 0) {
1957                 fprintf(f, "%sReadOnlyDirs:", prefix);
1958                 strv_fprintf(f, c->read_only_dirs);
1959                 fputs("\n", f);
1960         }
1961
1962         if (strv_length(c->inaccessible_dirs) > 0) {
1963                 fprintf(f, "%sInaccessibleDirs:", prefix);
1964                 strv_fprintf(f, c->inaccessible_dirs);
1965                 fputs("\n", f);
1966         }
1967
1968         if (c->utmp_id)
1969                 fprintf(f,
1970                         "%sUtmpIdentifier: %s\n",
1971                         prefix, c->utmp_id);
1972 }
1973
1974 void exec_status_start(ExecStatus *s, pid_t pid) {
1975         assert(s);
1976
1977         zero(*s);
1978         s->pid = pid;
1979         dual_timestamp_get(&s->start_timestamp);
1980 }
1981
1982 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1983         assert(s);
1984
1985         if (s->pid && s->pid != pid)
1986                 zero(*s);
1987
1988         s->pid = pid;
1989         dual_timestamp_get(&s->exit_timestamp);
1990
1991         s->code = code;
1992         s->status = status;
1993
1994         if (context) {
1995                 if (context->utmp_id)
1996                         utmp_put_dead_process(context->utmp_id, pid, code, status);
1997
1998                 exec_context_tty_reset(context);
1999         }
2000 }
2001
2002 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2003         char buf[FORMAT_TIMESTAMP_MAX];
2004
2005         assert(s);
2006         assert(f);
2007
2008         if (!prefix)
2009                 prefix = "";
2010
2011         if (s->pid <= 0)
2012                 return;
2013
2014         fprintf(f,
2015                 "%sPID: %lu\n",
2016                 prefix, (unsigned long) s->pid);
2017
2018         if (s->start_timestamp.realtime > 0)
2019                 fprintf(f,
2020                         "%sStart Timestamp: %s\n",
2021                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2022
2023         if (s->exit_timestamp.realtime > 0)
2024                 fprintf(f,
2025                         "%sExit Timestamp: %s\n"
2026                         "%sExit Code: %s\n"
2027                         "%sExit Status: %i\n",
2028                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2029                         prefix, sigchld_code_to_string(s->code),
2030                         prefix, s->status);
2031 }
2032
2033 char *exec_command_line(char **argv) {
2034         size_t k;
2035         char *n, *p, **a;
2036         bool first = true;
2037
2038         assert(argv);
2039
2040         k = 1;
2041         STRV_FOREACH(a, argv)
2042                 k += strlen(*a)+3;
2043
2044         if (!(n = new(char, k)))
2045                 return NULL;
2046
2047         p = n;
2048         STRV_FOREACH(a, argv) {
2049
2050                 if (!first)
2051                         *(p++) = ' ';
2052                 else
2053                         first = false;
2054
2055                 if (strpbrk(*a, WHITESPACE)) {
2056                         *(p++) = '\'';
2057                         p = stpcpy(p, *a);
2058                         *(p++) = '\'';
2059                 } else
2060                         p = stpcpy(p, *a);
2061
2062         }
2063
2064         *p = 0;
2065
2066         /* FIXME: this doesn't really handle arguments that have
2067          * spaces and ticks in them */
2068
2069         return n;
2070 }
2071
2072 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2073         char *p2;
2074         const char *prefix2;
2075
2076         char *cmd;
2077
2078         assert(c);
2079         assert(f);
2080
2081         if (!prefix)
2082                 prefix = "";
2083         p2 = strappend(prefix, "\t");
2084         prefix2 = p2 ? p2 : prefix;
2085
2086         cmd = exec_command_line(c->argv);
2087
2088         fprintf(f,
2089                 "%sCommand Line: %s\n",
2090                 prefix, cmd ? cmd : strerror(ENOMEM));
2091
2092         free(cmd);
2093
2094         exec_status_dump(&c->exec_status, f, prefix2);
2095
2096         free(p2);
2097 }
2098
2099 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2100         assert(f);
2101
2102         if (!prefix)
2103                 prefix = "";
2104
2105         LIST_FOREACH(command, c, c)
2106                 exec_command_dump(c, f, prefix);
2107 }
2108
2109 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2110         ExecCommand *end;
2111
2112         assert(l);
2113         assert(e);
2114
2115         if (*l) {
2116                 /* It's kind of important, that we keep the order here */
2117                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2118                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2119         } else
2120               *l = e;
2121 }
2122
2123 int exec_command_set(ExecCommand *c, const char *path, ...) {
2124         va_list ap;
2125         char **l, *p;
2126
2127         assert(c);
2128         assert(path);
2129
2130         va_start(ap, path);
2131         l = strv_new_ap(path, ap);
2132         va_end(ap);
2133
2134         if (!l)
2135                 return -ENOMEM;
2136
2137         if (!(p = strdup(path))) {
2138                 strv_free(l);
2139                 return -ENOMEM;
2140         }
2141
2142         free(c->path);
2143         c->path = p;
2144
2145         strv_free(c->argv);
2146         c->argv = l;
2147
2148         return 0;
2149 }
2150
2151 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2152         [EXEC_INPUT_NULL] = "null",
2153         [EXEC_INPUT_TTY] = "tty",
2154         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2155         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2156         [EXEC_INPUT_SOCKET] = "socket"
2157 };
2158
2159 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2160
2161 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2162         [EXEC_OUTPUT_INHERIT] = "inherit",
2163         [EXEC_OUTPUT_NULL] = "null",
2164         [EXEC_OUTPUT_TTY] = "tty",
2165         [EXEC_OUTPUT_SYSLOG] = "syslog",
2166         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2167         [EXEC_OUTPUT_KMSG] = "kmsg",
2168         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2169         [EXEC_OUTPUT_JOURNAL] = "journal",
2170         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2171         [EXEC_OUTPUT_SOCKET] = "socket"
2172 };
2173
2174 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);