chiark / gitweb /
core: initialize variable
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43 #include <libgen.h>
44
45 #ifdef HAVE_PAM
46 #include <security/pam_appl.h>
47 #endif
48
49 #include "execute.h"
50 #include "strv.h"
51 #include "macro.h"
52 #include "capability.h"
53 #include "util.h"
54 #include "log.h"
55 #include "sd-messages.h"
56 #include "ioprio.h"
57 #include "securebits.h"
58 #include "namespace.h"
59 #include "tcpwrap.h"
60 #include "exit-status.h"
61 #include "missing.h"
62 #include "utmp-wtmp.h"
63 #include "def.h"
64 #include "path-util.h"
65 #include "syscall-list.h"
66 #include "env-util.h"
67 #include "fileio.h"
68 #include "unit.h"
69 #include "async.h"
70
71 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
73
74 /* This assumes there is a 'tty' group */
75 #define TTY_MODE 0620
76
77 static int shift_fds(int fds[], unsigned n_fds) {
78         int start, restart_from;
79
80         if (n_fds <= 0)
81                 return 0;
82
83         /* Modifies the fds array! (sorts it) */
84
85         assert(fds);
86
87         start = 0;
88         for (;;) {
89                 int i;
90
91                 restart_from = -1;
92
93                 for (i = start; i < (int) n_fds; i++) {
94                         int nfd;
95
96                         /* Already at right index? */
97                         if (fds[i] == i+3)
98                                 continue;
99
100                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101                                 return -errno;
102
103                         close_nointr_nofail(fds[i]);
104                         fds[i] = nfd;
105
106                         /* Hmm, the fd we wanted isn't free? Then
107                          * let's remember that and try again from here*/
108                         if (nfd != i+3 && restart_from < 0)
109                                 restart_from = i;
110                 }
111
112                 if (restart_from < 0)
113                         break;
114
115                 start = restart_from;
116         }
117
118         return 0;
119 }
120
121 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
122         unsigned i;
123         int r;
124
125         if (n_fds <= 0)
126                 return 0;
127
128         assert(fds);
129
130         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
131
132         for (i = 0; i < n_fds; i++) {
133
134                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135                         return r;
136
137                 /* We unconditionally drop FD_CLOEXEC from the fds,
138                  * since after all we want to pass these fds to our
139                  * children */
140
141                 if ((r = fd_cloexec(fds[i], false)) < 0)
142                         return r;
143         }
144
145         return 0;
146 }
147
148 _pure_ static const char *tty_path(const ExecContext *context) {
149         assert(context);
150
151         if (context->tty_path)
152                 return context->tty_path;
153
154         return "/dev/console";
155 }
156
157 static void exec_context_tty_reset(const ExecContext *context) {
158         assert(context);
159
160         if (context->tty_vhangup)
161                 terminal_vhangup(tty_path(context));
162
163         if (context->tty_reset)
164                 reset_terminal(tty_path(context));
165
166         if (context->tty_vt_disallocate && context->tty_path)
167                 vt_disallocate(context->tty_path);
168 }
169
170 static bool is_terminal_output(ExecOutput o) {
171         return
172                 o == EXEC_OUTPUT_TTY ||
173                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
174                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
175                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
176 }
177
178 static int open_null_as(int flags, int nfd) {
179         int fd, r;
180
181         assert(nfd >= 0);
182
183         fd = open("/dev/null", flags|O_NOCTTY);
184         if (fd < 0)
185                 return -errno;
186
187         if (fd != nfd) {
188                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
189                 close_nointr_nofail(fd);
190         } else
191                 r = nfd;
192
193         return r;
194 }
195
196 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
197         int fd, r;
198         union sockaddr_union sa = {
199                 .un.sun_family = AF_UNIX,
200                 .un.sun_path = "/run/systemd/journal/stdout",
201         };
202
203         assert(context);
204         assert(output < _EXEC_OUTPUT_MAX);
205         assert(ident);
206         assert(nfd >= 0);
207
208         fd = socket(AF_UNIX, SOCK_STREAM, 0);
209         if (fd < 0)
210                 return -errno;
211
212         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
213         if (r < 0) {
214                 close_nointr_nofail(fd);
215                 return -errno;
216         }
217
218         if (shutdown(fd, SHUT_RD) < 0) {
219                 close_nointr_nofail(fd);
220                 return -errno;
221         }
222
223         dprintf(fd,
224                 "%s\n"
225                 "%s\n"
226                 "%i\n"
227                 "%i\n"
228                 "%i\n"
229                 "%i\n"
230                 "%i\n",
231                 context->syslog_identifier ? context->syslog_identifier : ident,
232                 unit_id,
233                 context->syslog_priority,
234                 !!context->syslog_level_prefix,
235                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
236                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
237                 is_terminal_output(output));
238
239         if (fd != nfd) {
240                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
241                 close_nointr_nofail(fd);
242         } else
243                 r = nfd;
244
245         return r;
246 }
247 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
248         int fd, r;
249
250         assert(path);
251         assert(nfd >= 0);
252
253         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
254                 return fd;
255
256         if (fd != nfd) {
257                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
258                 close_nointr_nofail(fd);
259         } else
260                 r = nfd;
261
262         return r;
263 }
264
265 static bool is_terminal_input(ExecInput i) {
266         return
267                 i == EXEC_INPUT_TTY ||
268                 i == EXEC_INPUT_TTY_FORCE ||
269                 i == EXEC_INPUT_TTY_FAIL;
270 }
271
272 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
273
274         if (is_terminal_input(std_input) && !apply_tty_stdin)
275                 return EXEC_INPUT_NULL;
276
277         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
278                 return EXEC_INPUT_NULL;
279
280         return std_input;
281 }
282
283 static int fixup_output(ExecOutput std_output, int socket_fd) {
284
285         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
286                 return EXEC_OUTPUT_INHERIT;
287
288         return std_output;
289 }
290
291 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
292         ExecInput i;
293
294         assert(context);
295
296         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
297
298         switch (i) {
299
300         case EXEC_INPUT_NULL:
301                 return open_null_as(O_RDONLY, STDIN_FILENO);
302
303         case EXEC_INPUT_TTY:
304         case EXEC_INPUT_TTY_FORCE:
305         case EXEC_INPUT_TTY_FAIL: {
306                 int fd, r;
307
308                 fd = acquire_terminal(tty_path(context),
309                                       i == EXEC_INPUT_TTY_FAIL,
310                                       i == EXEC_INPUT_TTY_FORCE,
311                                       false,
312                                       (usec_t) -1);
313                 if (fd < 0)
314                         return fd;
315
316                 if (fd != STDIN_FILENO) {
317                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
318                         close_nointr_nofail(fd);
319                 } else
320                         r = STDIN_FILENO;
321
322                 return r;
323         }
324
325         case EXEC_INPUT_SOCKET:
326                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
327
328         default:
329                 assert_not_reached("Unknown input type");
330         }
331 }
332
333 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
334         ExecOutput o;
335         ExecInput i;
336         int r;
337
338         assert(context);
339         assert(ident);
340
341         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
342         o = fixup_output(context->std_output, socket_fd);
343
344         if (fileno == STDERR_FILENO) {
345                 ExecOutput e;
346                 e = fixup_output(context->std_error, socket_fd);
347
348                 /* This expects the input and output are already set up */
349
350                 /* Don't change the stderr file descriptor if we inherit all
351                  * the way and are not on a tty */
352                 if (e == EXEC_OUTPUT_INHERIT &&
353                     o == EXEC_OUTPUT_INHERIT &&
354                     i == EXEC_INPUT_NULL &&
355                     !is_terminal_input(context->std_input) &&
356                     getppid () != 1)
357                         return fileno;
358
359                 /* Duplicate from stdout if possible */
360                 if (e == o || e == EXEC_OUTPUT_INHERIT)
361                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
362
363                 o = e;
364
365         } else if (o == EXEC_OUTPUT_INHERIT) {
366                 /* If input got downgraded, inherit the original value */
367                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
368                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
369
370                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
371                 if (i != EXEC_INPUT_NULL)
372                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
373
374                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
375                 if (getppid() != 1)
376                         return fileno;
377
378                 /* We need to open /dev/null here anew, to get the right access mode. */
379                 return open_null_as(O_WRONLY, fileno);
380         }
381
382         switch (o) {
383
384         case EXEC_OUTPUT_NULL:
385                 return open_null_as(O_WRONLY, fileno);
386
387         case EXEC_OUTPUT_TTY:
388                 if (is_terminal_input(i))
389                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
390
391                 /* We don't reset the terminal if this is just about output */
392                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
393
394         case EXEC_OUTPUT_SYSLOG:
395         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
396         case EXEC_OUTPUT_KMSG:
397         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
398         case EXEC_OUTPUT_JOURNAL:
399         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
400                 r = connect_logger_as(context, o, ident, unit_id, fileno);
401                 if (r < 0) {
402                         log_struct_unit(LOG_CRIT, unit_id,
403                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
404                                 fileno == STDOUT_FILENO ? "out" : "err",
405                                 unit_id, strerror(-r),
406                                 "ERRNO=%d", -r,
407                                 NULL);
408                         r = open_null_as(O_WRONLY, fileno);
409                 }
410                 return r;
411
412         case EXEC_OUTPUT_SOCKET:
413                 assert(socket_fd >= 0);
414                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
415
416         default:
417                 assert_not_reached("Unknown error type");
418         }
419 }
420
421 static int chown_terminal(int fd, uid_t uid) {
422         struct stat st;
423
424         assert(fd >= 0);
425
426         /* This might fail. What matters are the results. */
427         (void) fchown(fd, uid, -1);
428         (void) fchmod(fd, TTY_MODE);
429
430         if (fstat(fd, &st) < 0)
431                 return -errno;
432
433         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
434                 return -EPERM;
435
436         return 0;
437 }
438
439 static int setup_confirm_stdio(int *_saved_stdin,
440                                int *_saved_stdout) {
441         int fd = -1, saved_stdin, saved_stdout = -1, r;
442
443         assert(_saved_stdin);
444         assert(_saved_stdout);
445
446         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
447         if (saved_stdin < 0)
448                 return -errno;
449
450         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
451         if (saved_stdout < 0) {
452                 r = errno;
453                 goto fail;
454         }
455
456         fd = acquire_terminal(
457                         "/dev/console",
458                         false,
459                         false,
460                         false,
461                         DEFAULT_CONFIRM_USEC);
462         if (fd < 0) {
463                 r = fd;
464                 goto fail;
465         }
466
467         r = chown_terminal(fd, getuid());
468         if (r < 0)
469                 goto fail;
470
471         if (dup2(fd, STDIN_FILENO) < 0) {
472                 r = -errno;
473                 goto fail;
474         }
475
476         if (dup2(fd, STDOUT_FILENO) < 0) {
477                 r = -errno;
478                 goto fail;
479         }
480
481         if (fd >= 2)
482                 close_nointr_nofail(fd);
483
484         *_saved_stdin = saved_stdin;
485         *_saved_stdout = saved_stdout;
486
487         return 0;
488
489 fail:
490         if (saved_stdout >= 0)
491                 close_nointr_nofail(saved_stdout);
492
493         if (saved_stdin >= 0)
494                 close_nointr_nofail(saved_stdin);
495
496         if (fd >= 0)
497                 close_nointr_nofail(fd);
498
499         return r;
500 }
501
502 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
503         int fd;
504         va_list ap;
505
506         assert(format);
507
508         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
509         if (fd < 0)
510                 return fd;
511
512         va_start(ap, format);
513         vdprintf(fd, format, ap);
514         va_end(ap);
515
516         close_nointr_nofail(fd);
517
518         return 0;
519 }
520
521 static int restore_confirm_stdio(int *saved_stdin,
522                                  int *saved_stdout) {
523
524         int r = 0;
525
526         assert(saved_stdin);
527         assert(saved_stdout);
528
529         release_terminal();
530
531         if (*saved_stdin >= 0)
532                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
533                         r = -errno;
534
535         if (*saved_stdout >= 0)
536                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
537                         r = -errno;
538
539         if (*saved_stdin >= 0)
540                 close_nointr_nofail(*saved_stdin);
541
542         if (*saved_stdout >= 0)
543                 close_nointr_nofail(*saved_stdout);
544
545         return r;
546 }
547
548 static int ask_for_confirmation(char *response, char **argv) {
549         int saved_stdout = -1, saved_stdin = -1, r;
550         char *line;
551
552         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
553         if (r < 0)
554                 return r;
555
556         line = exec_command_line(argv);
557         if (!line)
558                 return -ENOMEM;
559
560         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
561         free(line);
562
563         restore_confirm_stdio(&saved_stdin, &saved_stdout);
564
565         return r;
566 }
567
568 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
569         bool keep_groups = false;
570         int r;
571
572         assert(context);
573
574         /* Lookup and set GID and supplementary group list. Here too
575          * we avoid NSS lookups for gid=0. */
576
577         if (context->group || username) {
578
579                 if (context->group) {
580                         const char *g = context->group;
581
582                         if ((r = get_group_creds(&g, &gid)) < 0)
583                                 return r;
584                 }
585
586                 /* First step, initialize groups from /etc/groups */
587                 if (username && gid != 0) {
588                         if (initgroups(username, gid) < 0)
589                                 return -errno;
590
591                         keep_groups = true;
592                 }
593
594                 /* Second step, set our gids */
595                 if (setresgid(gid, gid, gid) < 0)
596                         return -errno;
597         }
598
599         if (context->supplementary_groups) {
600                 int ngroups_max, k;
601                 gid_t *gids;
602                 char **i;
603
604                 /* Final step, initialize any manually set supplementary groups */
605                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
606
607                 if (!(gids = new(gid_t, ngroups_max)))
608                         return -ENOMEM;
609
610                 if (keep_groups) {
611                         if ((k = getgroups(ngroups_max, gids)) < 0) {
612                                 free(gids);
613                                 return -errno;
614                         }
615                 } else
616                         k = 0;
617
618                 STRV_FOREACH(i, context->supplementary_groups) {
619                         const char *g;
620
621                         if (k >= ngroups_max) {
622                                 free(gids);
623                                 return -E2BIG;
624                         }
625
626                         g = *i;
627                         r = get_group_creds(&g, gids+k);
628                         if (r < 0) {
629                                 free(gids);
630                                 return r;
631                         }
632
633                         k++;
634                 }
635
636                 if (setgroups(k, gids) < 0) {
637                         free(gids);
638                         return -errno;
639                 }
640
641                 free(gids);
642         }
643
644         return 0;
645 }
646
647 static int enforce_user(const ExecContext *context, uid_t uid) {
648         int r;
649         assert(context);
650
651         /* Sets (but doesn't lookup) the uid and make sure we keep the
652          * capabilities while doing so. */
653
654         if (context->capabilities) {
655                 cap_t d;
656                 static const cap_value_t bits[] = {
657                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
658                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
659                 };
660
661                 /* First step: If we need to keep capabilities but
662                  * drop privileges we need to make sure we keep our
663                  * caps, while we drop privileges. */
664                 if (uid != 0) {
665                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
666
667                         if (prctl(PR_GET_SECUREBITS) != sb)
668                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
669                                         return -errno;
670                 }
671
672                 /* Second step: set the capabilities. This will reduce
673                  * the capabilities to the minimum we need. */
674
675                 if (!(d = cap_dup(context->capabilities)))
676                         return -errno;
677
678                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
679                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
680                         r = -errno;
681                         cap_free(d);
682                         return r;
683                 }
684
685                 if (cap_set_proc(d) < 0) {
686                         r = -errno;
687                         cap_free(d);
688                         return r;
689                 }
690
691                 cap_free(d);
692         }
693
694         /* Third step: actually set the uids */
695         if (setresuid(uid, uid, uid) < 0)
696                 return -errno;
697
698         /* At this point we should have all necessary capabilities but
699            are otherwise a normal user. However, the caps might got
700            corrupted due to the setresuid() so we need clean them up
701            later. This is done outside of this call. */
702
703         return 0;
704 }
705
706 #ifdef HAVE_PAM
707
708 static int null_conv(
709                 int num_msg,
710                 const struct pam_message **msg,
711                 struct pam_response **resp,
712                 void *appdata_ptr) {
713
714         /* We don't support conversations */
715
716         return PAM_CONV_ERR;
717 }
718
719 static int setup_pam(
720                 const char *name,
721                 const char *user,
722                 uid_t uid,
723                 const char *tty,
724                 char ***pam_env,
725                 int fds[], unsigned n_fds) {
726
727         static const struct pam_conv conv = {
728                 .conv = null_conv,
729                 .appdata_ptr = NULL
730         };
731
732         pam_handle_t *handle = NULL;
733         sigset_t ss, old_ss;
734         int pam_code = PAM_SUCCESS;
735         int err;
736         char **e = NULL;
737         bool close_session = false;
738         pid_t pam_pid = 0, parent_pid;
739         int flags = 0;
740
741         assert(name);
742         assert(user);
743         assert(pam_env);
744
745         /* We set up PAM in the parent process, then fork. The child
746          * will then stay around until killed via PR_GET_PDEATHSIG or
747          * systemd via the cgroup logic. It will then remove the PAM
748          * session again. The parent process will exec() the actual
749          * daemon. We do things this way to ensure that the main PID
750          * of the daemon is the one we initially fork()ed. */
751
752         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
753                 flags |= PAM_SILENT;
754
755         pam_code = pam_start(name, user, &conv, &handle);
756         if (pam_code != PAM_SUCCESS) {
757                 handle = NULL;
758                 goto fail;
759         }
760
761         if (tty) {
762                 pam_code = pam_set_item(handle, PAM_TTY, tty);
763                 if (pam_code != PAM_SUCCESS)
764                         goto fail;
765         }
766
767         pam_code = pam_acct_mgmt(handle, flags);
768         if (pam_code != PAM_SUCCESS)
769                 goto fail;
770
771         pam_code = pam_open_session(handle, flags);
772         if (pam_code != PAM_SUCCESS)
773                 goto fail;
774
775         close_session = true;
776
777         e = pam_getenvlist(handle);
778         if (!e) {
779                 pam_code = PAM_BUF_ERR;
780                 goto fail;
781         }
782
783         /* Block SIGTERM, so that we know that it won't get lost in
784          * the child */
785         if (sigemptyset(&ss) < 0 ||
786             sigaddset(&ss, SIGTERM) < 0 ||
787             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
788                 goto fail;
789
790         parent_pid = getpid();
791
792         pam_pid = fork();
793         if (pam_pid < 0)
794                 goto fail;
795
796         if (pam_pid == 0) {
797                 int sig;
798                 int r = EXIT_PAM;
799
800                 /* The child's job is to reset the PAM session on
801                  * termination */
802
803                 /* This string must fit in 10 chars (i.e. the length
804                  * of "/sbin/init"), to look pretty in /bin/ps */
805                 rename_process("(sd-pam)");
806
807                 /* Make sure we don't keep open the passed fds in this
808                 child. We assume that otherwise only those fds are
809                 open here that have been opened by PAM. */
810                 close_many(fds, n_fds);
811
812                 /* Drop privileges - we don't need any to pam_close_session
813                  * and this will make PR_SET_PDEATHSIG work in most cases.
814                  * If this fails, ignore the error - but expect sd-pam threads
815                  * to fail to exit normally */
816                 if (setresuid(uid, uid, uid) < 0)
817                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
818
819                 /* Wait until our parent died. This will only work if
820                  * the above setresuid() succeeds, otherwise the kernel
821                  * will not allow unprivileged parents kill their privileged
822                  * children this way. We rely on the control groups kill logic
823                  * to do the rest for us. */
824                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
825                         goto child_finish;
826
827                 /* Check if our parent process might already have
828                  * died? */
829                 if (getppid() == parent_pid) {
830                         for (;;) {
831                                 if (sigwait(&ss, &sig) < 0) {
832                                         if (errno == EINTR)
833                                                 continue;
834
835                                         goto child_finish;
836                                 }
837
838                                 assert(sig == SIGTERM);
839                                 break;
840                         }
841                 }
842
843                 /* If our parent died we'll end the session */
844                 if (getppid() != parent_pid) {
845                         pam_code = pam_close_session(handle, flags);
846                         if (pam_code != PAM_SUCCESS)
847                                 goto child_finish;
848                 }
849
850                 r = 0;
851
852         child_finish:
853                 pam_end(handle, pam_code | flags);
854                 _exit(r);
855         }
856
857         /* If the child was forked off successfully it will do all the
858          * cleanups, so forget about the handle here. */
859         handle = NULL;
860
861         /* Unblock SIGTERM again in the parent */
862         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
863                 goto fail;
864
865         /* We close the log explicitly here, since the PAM modules
866          * might have opened it, but we don't want this fd around. */
867         closelog();
868
869         *pam_env = e;
870         e = NULL;
871
872         return 0;
873
874 fail:
875         if (pam_code != PAM_SUCCESS) {
876                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
877                 err = -EPERM;  /* PAM errors do not map to errno */
878         } else {
879                 log_error("PAM failed: %m");
880                 err = -errno;
881         }
882
883         if (handle) {
884                 if (close_session)
885                         pam_code = pam_close_session(handle, flags);
886
887                 pam_end(handle, pam_code | flags);
888         }
889
890         strv_free(e);
891
892         closelog();
893
894         if (pam_pid > 1) {
895                 kill(pam_pid, SIGTERM);
896                 kill(pam_pid, SIGCONT);
897         }
898
899         return err;
900 }
901 #endif
902
903 static void rename_process_from_path(const char *path) {
904         char process_name[11];
905         const char *p;
906         size_t l;
907
908         /* This resulting string must fit in 10 chars (i.e. the length
909          * of "/sbin/init") to look pretty in /bin/ps */
910
911         p = path_get_file_name(path);
912         if (isempty(p)) {
913                 rename_process("(...)");
914                 return;
915         }
916
917         l = strlen(p);
918         if (l > 8) {
919                 /* The end of the process name is usually more
920                  * interesting, since the first bit might just be
921                  * "systemd-" */
922                 p = p + l - 8;
923                 l = 8;
924         }
925
926         process_name[0] = '(';
927         memcpy(process_name+1, p, l);
928         process_name[1+l] = ')';
929         process_name[1+l+1] = 0;
930
931         rename_process(process_name);
932 }
933
934 static int apply_seccomp(uint32_t *syscall_filter) {
935         static const struct sock_filter header[] = {
936                 VALIDATE_ARCHITECTURE,
937                 EXAMINE_SYSCALL
938         };
939         static const struct sock_filter footer[] = {
940                 _KILL_PROCESS
941         };
942
943         int i;
944         unsigned n;
945         struct sock_filter *f;
946         struct sock_fprog prog = {};
947
948         assert(syscall_filter);
949
950         /* First: count the syscalls to check for */
951         for (i = 0, n = 0; i < syscall_max(); i++)
952                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
953                         n++;
954
955         /* Second: build the filter program from a header the syscall
956          * matches and the footer */
957         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
958         memcpy(f, header, sizeof(header));
959
960         for (i = 0, n = 0; i < syscall_max(); i++)
961                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
962                         struct sock_filter item[] = {
963                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
964                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
965                         };
966
967                         assert_cc(ELEMENTSOF(item) == 2);
968
969                         f[ELEMENTSOF(header) + 2*n]  = item[0];
970                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
971
972                         n++;
973                 }
974
975         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
976
977         /* Third: install the filter */
978         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
979         prog.filter = f;
980         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
981                 return -errno;
982
983         return 0;
984 }
985
986 static void do_idle_pipe_dance(int idle_pipe[4]) {
987         assert(idle_pipe);
988
989         if (idle_pipe[1] >= 0)
990                 close_nointr_nofail(idle_pipe[1]);
991         if (idle_pipe[2] >= 0)
992                 close_nointr_nofail(idle_pipe[2]);
993
994         if (idle_pipe[0] >= 0) {
995                 int r;
996
997                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
998
999                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1000                         /* Signal systemd that we are bored and want to continue. */
1001                         write(idle_pipe[3], "x", 1);
1002
1003                         /* Wait for systemd to react to the signal above. */
1004                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1005                 }
1006
1007                 close_nointr_nofail(idle_pipe[0]);
1008
1009         }
1010
1011         if (idle_pipe[3] >= 0)
1012                 close_nointr_nofail(idle_pipe[3]);
1013 }
1014
1015 int exec_spawn(ExecCommand *command,
1016                char **argv,
1017                ExecContext *context,
1018                int fds[], unsigned n_fds,
1019                char **environment,
1020                bool apply_permissions,
1021                bool apply_chroot,
1022                bool apply_tty_stdin,
1023                bool confirm_spawn,
1024                CGroupControllerMask cgroup_supported,
1025                const char *cgroup_path,
1026                const char *unit_id,
1027                int idle_pipe[4],
1028                ExecRuntime *runtime,
1029                pid_t *ret) {
1030
1031         _cleanup_strv_free_ char **files_env = NULL;
1032         int socket_fd;
1033         char *line;
1034         pid_t pid;
1035         int r;
1036
1037         assert(command);
1038         assert(context);
1039         assert(ret);
1040         assert(fds || n_fds <= 0);
1041
1042         if (context->std_input == EXEC_INPUT_SOCKET ||
1043             context->std_output == EXEC_OUTPUT_SOCKET ||
1044             context->std_error == EXEC_OUTPUT_SOCKET) {
1045
1046                 if (n_fds != 1)
1047                         return -EINVAL;
1048
1049                 socket_fd = fds[0];
1050
1051                 fds = NULL;
1052                 n_fds = 0;
1053         } else
1054                 socket_fd = -1;
1055
1056         r = exec_context_load_environment(context, &files_env);
1057         if (r < 0) {
1058                 log_struct_unit(LOG_ERR,
1059                            unit_id,
1060                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1061                            "ERRNO=%d", -r,
1062                            NULL);
1063                 return r;
1064         }
1065
1066         if (!argv)
1067                 argv = command->argv;
1068
1069         line = exec_command_line(argv);
1070         if (!line)
1071                 return log_oom();
1072
1073         log_struct_unit(LOG_DEBUG,
1074                         unit_id,
1075                         "EXECUTABLE=%s", command->path,
1076                         "MESSAGE=About to execute: %s", line,
1077                         NULL);
1078         free(line);
1079
1080         pid = fork();
1081         if (pid < 0)
1082                 return -errno;
1083
1084         if (pid == 0) {
1085                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1086                 const char *username = NULL, *home = NULL, *shell = NULL;
1087                 unsigned n_dont_close = 0, n_env = 0;
1088                 int dont_close[n_fds + 3];
1089                 uid_t uid = (uid_t) -1;
1090                 gid_t gid = (gid_t) -1;
1091                 sigset_t ss;
1092                 int i, err;
1093
1094                 /* child */
1095
1096                 rename_process_from_path(command->path);
1097
1098                 /* We reset exactly these signals, since they are the
1099                  * only ones we set to SIG_IGN in the main daemon. All
1100                  * others we leave untouched because we set them to
1101                  * SIG_DFL or a valid handler initially, both of which
1102                  * will be demoted to SIG_DFL. */
1103                 default_signals(SIGNALS_CRASH_HANDLER,
1104                                 SIGNALS_IGNORE, -1);
1105
1106                 if (context->ignore_sigpipe)
1107                         ignore_signals(SIGPIPE, -1);
1108
1109                 assert_se(sigemptyset(&ss) == 0);
1110                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1111                         err = -errno;
1112                         r = EXIT_SIGNAL_MASK;
1113                         goto fail_child;
1114                 }
1115
1116                 if (idle_pipe)
1117                         do_idle_pipe_dance(idle_pipe);
1118
1119                 /* Close sockets very early to make sure we don't
1120                  * block init reexecution because it cannot bind its
1121                  * sockets */
1122                 log_forget_fds();
1123
1124                 if (socket_fd >= 0)
1125                         dont_close[n_dont_close++] = socket_fd;
1126                 if (n_fds > 0) {
1127                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1128                         n_dont_close += n_fds;
1129                 }
1130                 if (runtime) {
1131                         if (runtime->netns_storage_socket[0] >= 0)
1132                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1133                         if (runtime->netns_storage_socket[1] >= 0)
1134                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1135                 }
1136
1137                 err = close_all_fds(dont_close, n_dont_close);
1138                 if (err < 0) {
1139                         r = EXIT_FDS;
1140                         goto fail_child;
1141                 }
1142
1143                 if (!context->same_pgrp)
1144                         if (setsid() < 0) {
1145                                 err = -errno;
1146                                 r = EXIT_SETSID;
1147                                 goto fail_child;
1148                         }
1149
1150                 if (context->tcpwrap_name) {
1151                         if (socket_fd >= 0)
1152                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1153                                         err = -EACCES;
1154                                         r = EXIT_TCPWRAP;
1155                                         goto fail_child;
1156                                 }
1157
1158                         for (i = 0; i < (int) n_fds; i++) {
1159                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1160                                         err = -EACCES;
1161                                         r = EXIT_TCPWRAP;
1162                                         goto fail_child;
1163                                 }
1164                         }
1165                 }
1166
1167                 exec_context_tty_reset(context);
1168
1169                 if (confirm_spawn) {
1170                         char response;
1171
1172                         err = ask_for_confirmation(&response, argv);
1173                         if (err == -ETIMEDOUT)
1174                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1175                         else if (err < 0)
1176                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1177                         else if (response == 's') {
1178                                 write_confirm_message("Skipping execution.\n");
1179                                 err = -ECANCELED;
1180                                 r = EXIT_CONFIRM;
1181                                 goto fail_child;
1182                         } else if (response == 'n') {
1183                                 write_confirm_message("Failing execution.\n");
1184                                 err = r = 0;
1185                                 goto fail_child;
1186                         }
1187                 }
1188
1189                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1190                  * must sure to drop O_NONBLOCK */
1191                 if (socket_fd >= 0)
1192                         fd_nonblock(socket_fd, false);
1193
1194                 err = setup_input(context, socket_fd, apply_tty_stdin);
1195                 if (err < 0) {
1196                         r = EXIT_STDIN;
1197                         goto fail_child;
1198                 }
1199
1200                 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1201                 if (err < 0) {
1202                         r = EXIT_STDOUT;
1203                         goto fail_child;
1204                 }
1205
1206                 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1207                 if (err < 0) {
1208                         r = EXIT_STDERR;
1209                         goto fail_child;
1210                 }
1211
1212                 if (cgroup_path) {
1213                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1214                         if (err < 0) {
1215                                 r = EXIT_CGROUP;
1216                                 goto fail_child;
1217                         }
1218                 }
1219
1220                 if (context->oom_score_adjust_set) {
1221                         char t[16];
1222
1223                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1224                         char_array_0(t);
1225
1226                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1227                                 err = -errno;
1228                                 r = EXIT_OOM_ADJUST;
1229                                 goto fail_child;
1230                         }
1231                 }
1232
1233                 if (context->nice_set)
1234                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1235                                 err = -errno;
1236                                 r = EXIT_NICE;
1237                                 goto fail_child;
1238                         }
1239
1240                 if (context->cpu_sched_set) {
1241                         struct sched_param param = {
1242                                 .sched_priority = context->cpu_sched_priority,
1243                         };
1244
1245                         r = sched_setscheduler(0,
1246                                                context->cpu_sched_policy |
1247                                                (context->cpu_sched_reset_on_fork ?
1248                                                 SCHED_RESET_ON_FORK : 0),
1249                                                &param);
1250                         if (r < 0) {
1251                                 err = -errno;
1252                                 r = EXIT_SETSCHEDULER;
1253                                 goto fail_child;
1254                         }
1255                 }
1256
1257                 if (context->cpuset)
1258                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1259                                 err = -errno;
1260                                 r = EXIT_CPUAFFINITY;
1261                                 goto fail_child;
1262                         }
1263
1264                 if (context->ioprio_set)
1265                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1266                                 err = -errno;
1267                                 r = EXIT_IOPRIO;
1268                                 goto fail_child;
1269                         }
1270
1271                 if (context->timer_slack_nsec != (nsec_t) -1)
1272                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1273                                 err = -errno;
1274                                 r = EXIT_TIMERSLACK;
1275                                 goto fail_child;
1276                         }
1277
1278                 if (context->utmp_id)
1279                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1280
1281                 if (context->user) {
1282                         username = context->user;
1283                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1284                         if (err < 0) {
1285                                 r = EXIT_USER;
1286                                 goto fail_child;
1287                         }
1288
1289                         if (is_terminal_input(context->std_input)) {
1290                                 err = chown_terminal(STDIN_FILENO, uid);
1291                                 if (err < 0) {
1292                                         r = EXIT_STDIN;
1293                                         goto fail_child;
1294                                 }
1295                         }
1296                 }
1297
1298 #ifdef HAVE_PAM
1299                 if (cgroup_path && context->user && context->pam_name) {
1300                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1301                         if (err < 0) {
1302                                 r = EXIT_CGROUP;
1303                                 goto fail_child;
1304                         }
1305
1306
1307                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1308                         if (err < 0) {
1309                                 r = EXIT_CGROUP;
1310                                 goto fail_child;
1311                         }
1312                 }
1313 #endif
1314
1315                 if (apply_permissions) {
1316                         err = enforce_groups(context, username, gid);
1317                         if (err < 0) {
1318                                 r = EXIT_GROUP;
1319                                 goto fail_child;
1320                         }
1321                 }
1322
1323                 umask(context->umask);
1324
1325 #ifdef HAVE_PAM
1326                 if (apply_permissions && context->pam_name && username) {
1327                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1328                         if (err < 0) {
1329                                 r = EXIT_PAM;
1330                                 goto fail_child;
1331                         }
1332                 }
1333 #endif
1334                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1335                         err = setup_netns(runtime->netns_storage_socket);
1336                         if (err < 0) {
1337                                 r = EXIT_NETWORK;
1338                                 goto fail_child;
1339                         }
1340                 }
1341
1342                 if (!strv_isempty(context->read_write_dirs) ||
1343                     !strv_isempty(context->read_only_dirs) ||
1344                     !strv_isempty(context->inaccessible_dirs) ||
1345                     context->mount_flags != 0 ||
1346                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))) {
1347
1348                         char *tmp = NULL, *var = NULL;
1349
1350                         /* The runtime struct only contains the parent
1351                          * of the private /tmp, which is
1352                          * non-accessible to world users. Inside of it
1353                          * there's a /tmp that is sticky, and that's
1354                          * the one we want to use here. */
1355
1356                         if (context->private_tmp && runtime) {
1357                                 if (runtime->tmp_dir)
1358                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1359                                 if (runtime->var_tmp_dir)
1360                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1361                         }
1362
1363                         err = setup_namespace(
1364                                         context->read_write_dirs,
1365                                         context->read_only_dirs,
1366                                         context->inaccessible_dirs,
1367                                         tmp,
1368                                         var,
1369                                         context->mount_flags);
1370
1371                         if (err < 0) {
1372                                 r = EXIT_NAMESPACE;
1373                                 goto fail_child;
1374                         }
1375                 }
1376
1377                 if (apply_chroot) {
1378                         if (context->root_directory)
1379                                 if (chroot(context->root_directory) < 0) {
1380                                         err = -errno;
1381                                         r = EXIT_CHROOT;
1382                                         goto fail_child;
1383                                 }
1384
1385                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1386                                 err = -errno;
1387                                 r = EXIT_CHDIR;
1388                                 goto fail_child;
1389                         }
1390                 } else {
1391                         _cleanup_free_ char *d = NULL;
1392
1393                         if (asprintf(&d, "%s/%s",
1394                                      context->root_directory ? context->root_directory : "",
1395                                      context->working_directory ? context->working_directory : "") < 0) {
1396                                 err = -ENOMEM;
1397                                 r = EXIT_MEMORY;
1398                                 goto fail_child;
1399                         }
1400
1401                         if (chdir(d) < 0) {
1402                                 err = -errno;
1403                                 r = EXIT_CHDIR;
1404                                 goto fail_child;
1405                         }
1406                 }
1407
1408                 /* We repeat the fd closing here, to make sure that
1409                  * nothing is leaked from the PAM modules */
1410                 err = close_all_fds(fds, n_fds);
1411                 if (err >= 0)
1412                         err = shift_fds(fds, n_fds);
1413                 if (err >= 0)
1414                         err = flags_fds(fds, n_fds, context->non_blocking);
1415                 if (err < 0) {
1416                         r = EXIT_FDS;
1417                         goto fail_child;
1418                 }
1419
1420                 if (apply_permissions) {
1421
1422                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1423                                 if (!context->rlimit[i])
1424                                         continue;
1425
1426                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1427                                         err = -errno;
1428                                         r = EXIT_LIMITS;
1429                                         goto fail_child;
1430                                 }
1431                         }
1432
1433                         if (context->capability_bounding_set_drop) {
1434                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1435                                 if (err < 0) {
1436                                         r = EXIT_CAPABILITIES;
1437                                         goto fail_child;
1438                                 }
1439                         }
1440
1441                         if (context->user) {
1442                                 err = enforce_user(context, uid);
1443                                 if (err < 0) {
1444                                         r = EXIT_USER;
1445                                         goto fail_child;
1446                                 }
1447                         }
1448
1449                         /* PR_GET_SECUREBITS is not privileged, while
1450                          * PR_SET_SECUREBITS is. So to suppress
1451                          * potential EPERMs we'll try not to call
1452                          * PR_SET_SECUREBITS unless necessary. */
1453                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1454                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1455                                         err = -errno;
1456                                         r = EXIT_SECUREBITS;
1457                                         goto fail_child;
1458                                 }
1459
1460                         if (context->capabilities)
1461                                 if (cap_set_proc(context->capabilities) < 0) {
1462                                         err = -errno;
1463                                         r = EXIT_CAPABILITIES;
1464                                         goto fail_child;
1465                                 }
1466
1467                         if (context->no_new_privileges)
1468                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1469                                         err = -errno;
1470                                         r = EXIT_NO_NEW_PRIVILEGES;
1471                                         goto fail_child;
1472                                 }
1473
1474                         if (context->syscall_filter) {
1475                                 err = apply_seccomp(context->syscall_filter);
1476                                 if (err < 0) {
1477                                         r = EXIT_SECCOMP;
1478                                         goto fail_child;
1479                                 }
1480                         }
1481                 }
1482
1483                 our_env = new(char*, 8);
1484                 if (!our_env ||
1485                     (n_fds > 0 && (
1486                             asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1487                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0)) ||
1488                     (home && asprintf(our_env + n_env++, "HOME=%s", home) < 0) ||
1489                     (username && (
1490                             asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1491                             asprintf(our_env + n_env++, "USER=%s", username) < 0)) ||
1492                     (shell && asprintf(our_env + n_env++, "SHELL=%s", shell) < 0) ||
1493                     ((is_terminal_input(context->std_input) ||
1494                       context->std_output == EXEC_OUTPUT_TTY ||
1495                       context->std_error == EXEC_OUTPUT_TTY) && (
1496                               !(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))))) {
1497
1498                         err = -ENOMEM;
1499                         r = EXIT_MEMORY;
1500                         goto fail_child;
1501                 }
1502
1503                 our_env[n_env++] = NULL;
1504                 assert(n_env <= 8);
1505
1506                 final_env = strv_env_merge(5,
1507                                            environment,
1508                                            our_env,
1509                                            context->environment,
1510                                            files_env,
1511                                            pam_env,
1512                                            NULL);
1513                 if (!final_env) {
1514                         err = -ENOMEM;
1515                         r = EXIT_MEMORY;
1516                         goto fail_child;
1517                 }
1518
1519                 final_argv = replace_env_argv(argv, final_env);
1520                 if (!final_argv) {
1521                         err = -ENOMEM;
1522                         r = EXIT_MEMORY;
1523                         goto fail_child;
1524                 }
1525
1526                 final_env = strv_env_clean(final_env);
1527
1528                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1529                         line = exec_command_line(final_argv);
1530                         if (line) {
1531                                 log_open();
1532                                 log_struct_unit(LOG_DEBUG,
1533                                                 unit_id,
1534                                                 "EXECUTABLE=%s", command->path,
1535                                                 "MESSAGE=Executing: %s", line,
1536                                                 NULL);
1537                                 log_close();
1538                                 free(line);
1539                                 line = NULL;
1540                         }
1541                 }
1542                 execve(command->path, final_argv, final_env);
1543                 err = -errno;
1544                 r = EXIT_EXEC;
1545
1546         fail_child:
1547                 if (r != 0) {
1548                         log_open();
1549                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1550                                    "EXECUTABLE=%s", command->path,
1551                                    "MESSAGE=Failed at step %s spawning %s: %s",
1552                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1553                                           command->path, strerror(-err),
1554                                    "ERRNO=%d", -err,
1555                                    NULL);
1556                         log_close();
1557                 }
1558
1559                 _exit(r);
1560         }
1561
1562         log_struct_unit(LOG_DEBUG,
1563                         unit_id,
1564                         "MESSAGE=Forked %s as %lu",
1565                         command->path, (unsigned long) pid,
1566                         NULL);
1567
1568         /* We add the new process to the cgroup both in the child (so
1569          * that we can be sure that no user code is ever executed
1570          * outside of the cgroup) and in the parent (so that we can be
1571          * sure that when we kill the cgroup the process will be
1572          * killed too). */
1573         if (cgroup_path)
1574                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1575
1576         exec_status_start(&command->exec_status, pid);
1577
1578         *ret = pid;
1579         return 0;
1580 }
1581
1582 void exec_context_init(ExecContext *c) {
1583         assert(c);
1584
1585         c->umask = 0022;
1586         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1587         c->cpu_sched_policy = SCHED_OTHER;
1588         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1589         c->syslog_level_prefix = true;
1590         c->ignore_sigpipe = true;
1591         c->timer_slack_nsec = (nsec_t) -1;
1592 }
1593
1594 void exec_context_done(ExecContext *c) {
1595         unsigned l;
1596
1597         assert(c);
1598
1599         strv_free(c->environment);
1600         c->environment = NULL;
1601
1602         strv_free(c->environment_files);
1603         c->environment_files = NULL;
1604
1605         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1606                 free(c->rlimit[l]);
1607                 c->rlimit[l] = NULL;
1608         }
1609
1610         free(c->working_directory);
1611         c->working_directory = NULL;
1612         free(c->root_directory);
1613         c->root_directory = NULL;
1614
1615         free(c->tty_path);
1616         c->tty_path = NULL;
1617
1618         free(c->tcpwrap_name);
1619         c->tcpwrap_name = NULL;
1620
1621         free(c->syslog_identifier);
1622         c->syslog_identifier = NULL;
1623
1624         free(c->user);
1625         c->user = NULL;
1626
1627         free(c->group);
1628         c->group = NULL;
1629
1630         strv_free(c->supplementary_groups);
1631         c->supplementary_groups = NULL;
1632
1633         free(c->pam_name);
1634         c->pam_name = NULL;
1635
1636         if (c->capabilities) {
1637                 cap_free(c->capabilities);
1638                 c->capabilities = NULL;
1639         }
1640
1641         strv_free(c->read_only_dirs);
1642         c->read_only_dirs = NULL;
1643
1644         strv_free(c->read_write_dirs);
1645         c->read_write_dirs = NULL;
1646
1647         strv_free(c->inaccessible_dirs);
1648         c->inaccessible_dirs = NULL;
1649
1650         if (c->cpuset)
1651                 CPU_FREE(c->cpuset);
1652
1653         free(c->utmp_id);
1654         c->utmp_id = NULL;
1655
1656         free(c->syscall_filter);
1657         c->syscall_filter = NULL;
1658 }
1659
1660 void exec_command_done(ExecCommand *c) {
1661         assert(c);
1662
1663         free(c->path);
1664         c->path = NULL;
1665
1666         strv_free(c->argv);
1667         c->argv = NULL;
1668 }
1669
1670 void exec_command_done_array(ExecCommand *c, unsigned n) {
1671         unsigned i;
1672
1673         for (i = 0; i < n; i++)
1674                 exec_command_done(c+i);
1675 }
1676
1677 void exec_command_free_list(ExecCommand *c) {
1678         ExecCommand *i;
1679
1680         while ((i = c)) {
1681                 LIST_REMOVE(command, c, i);
1682                 exec_command_done(i);
1683                 free(i);
1684         }
1685 }
1686
1687 void exec_command_free_array(ExecCommand **c, unsigned n) {
1688         unsigned i;
1689
1690         for (i = 0; i < n; i++) {
1691                 exec_command_free_list(c[i]);
1692                 c[i] = NULL;
1693         }
1694 }
1695
1696 int exec_context_load_environment(const ExecContext *c, char ***l) {
1697         char **i, **r = NULL;
1698
1699         assert(c);
1700         assert(l);
1701
1702         STRV_FOREACH(i, c->environment_files) {
1703                 char *fn;
1704                 int k;
1705                 bool ignore = false;
1706                 char **p;
1707                 _cleanup_globfree_ glob_t pglob = {};
1708                 int count, n;
1709
1710                 fn = *i;
1711
1712                 if (fn[0] == '-') {
1713                         ignore = true;
1714                         fn ++;
1715                 }
1716
1717                 if (!path_is_absolute(fn)) {
1718                         if (ignore)
1719                                 continue;
1720
1721                         strv_free(r);
1722                         return -EINVAL;
1723                 }
1724
1725                 /* Filename supports globbing, take all matching files */
1726                 errno = 0;
1727                 if (glob(fn, 0, NULL, &pglob) != 0) {
1728                         if (ignore)
1729                                 continue;
1730
1731                         strv_free(r);
1732                         return errno ? -errno : -EINVAL;
1733                 }
1734                 count = pglob.gl_pathc;
1735                 if (count == 0) {
1736                         if (ignore)
1737                                 continue;
1738
1739                         strv_free(r);
1740                         return -EINVAL;
1741                 }
1742                 for (n = 0; n < count; n++) {
1743                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1744                         if (k < 0) {
1745                                 if (ignore)
1746                                         continue;
1747
1748                                 strv_free(r);
1749                                 return k;
1750                         }
1751                         /* Log invalid environment variables with filename */
1752                         if (p)
1753                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1754
1755                         if (r == NULL)
1756                                 r = p;
1757                         else {
1758                                 char **m;
1759
1760                                 m = strv_env_merge(2, r, p);
1761                                 strv_free(r);
1762                                 strv_free(p);
1763                                 if (!m)
1764                                         return -ENOMEM;
1765
1766                                 r = m;
1767                         }
1768                 }
1769         }
1770
1771         *l = r;
1772
1773         return 0;
1774 }
1775
1776 static bool tty_may_match_dev_console(const char *tty) {
1777         char *active = NULL, *console;
1778         bool b;
1779
1780         if (startswith(tty, "/dev/"))
1781                 tty += 5;
1782
1783         /* trivial identity? */
1784         if (streq(tty, "console"))
1785                 return true;
1786
1787         console = resolve_dev_console(&active);
1788         /* if we could not resolve, assume it may */
1789         if (!console)
1790                 return true;
1791
1792         /* "tty0" means the active VC, so it may be the same sometimes */
1793         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1794         free(active);
1795
1796         return b;
1797 }
1798
1799 bool exec_context_may_touch_console(ExecContext *ec) {
1800         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1801                 is_terminal_input(ec->std_input) ||
1802                 is_terminal_output(ec->std_output) ||
1803                 is_terminal_output(ec->std_error)) &&
1804                tty_may_match_dev_console(tty_path(ec));
1805 }
1806
1807 static void strv_fprintf(FILE *f, char **l) {
1808         char **g;
1809
1810         assert(f);
1811
1812         STRV_FOREACH(g, l)
1813                 fprintf(f, " %s", *g);
1814 }
1815
1816 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1817         char **e;
1818         unsigned i;
1819
1820         assert(c);
1821         assert(f);
1822
1823         prefix = strempty(prefix);
1824
1825         fprintf(f,
1826                 "%sUMask: %04o\n"
1827                 "%sWorkingDirectory: %s\n"
1828                 "%sRootDirectory: %s\n"
1829                 "%sNonBlocking: %s\n"
1830                 "%sPrivateTmp: %s\n"
1831                 "%sPrivateNetwork: %s\n"
1832                 "%sIgnoreSIGPIPE: %s\n",
1833                 prefix, c->umask,
1834                 prefix, c->working_directory ? c->working_directory : "/",
1835                 prefix, c->root_directory ? c->root_directory : "/",
1836                 prefix, yes_no(c->non_blocking),
1837                 prefix, yes_no(c->private_tmp),
1838                 prefix, yes_no(c->private_network),
1839                 prefix, yes_no(c->ignore_sigpipe));
1840
1841         STRV_FOREACH(e, c->environment)
1842                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1843
1844         STRV_FOREACH(e, c->environment_files)
1845                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1846
1847         if (c->tcpwrap_name)
1848                 fprintf(f,
1849                         "%sTCPWrapName: %s\n",
1850                         prefix, c->tcpwrap_name);
1851
1852         if (c->nice_set)
1853                 fprintf(f,
1854                         "%sNice: %i\n",
1855                         prefix, c->nice);
1856
1857         if (c->oom_score_adjust_set)
1858                 fprintf(f,
1859                         "%sOOMScoreAdjust: %i\n",
1860                         prefix, c->oom_score_adjust);
1861
1862         for (i = 0; i < RLIM_NLIMITS; i++)
1863                 if (c->rlimit[i])
1864                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1865
1866         if (c->ioprio_set) {
1867                 char *class_str;
1868                 int r;
1869
1870                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1871                 if (r < 0)
1872                         class_str = NULL;
1873                 fprintf(f,
1874                         "%sIOSchedulingClass: %s\n"
1875                         "%sIOPriority: %i\n",
1876                         prefix, strna(class_str),
1877                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1878                 free(class_str);
1879         }
1880
1881         if (c->cpu_sched_set) {
1882                 char *policy_str;
1883                 int r;
1884
1885                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1886                 if (r < 0)
1887                         policy_str = NULL;
1888                 fprintf(f,
1889                         "%sCPUSchedulingPolicy: %s\n"
1890                         "%sCPUSchedulingPriority: %i\n"
1891                         "%sCPUSchedulingResetOnFork: %s\n",
1892                         prefix, strna(policy_str),
1893                         prefix, c->cpu_sched_priority,
1894                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1895                 free(policy_str);
1896         }
1897
1898         if (c->cpuset) {
1899                 fprintf(f, "%sCPUAffinity:", prefix);
1900                 for (i = 0; i < c->cpuset_ncpus; i++)
1901                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1902                                 fprintf(f, " %i", i);
1903                 fputs("\n", f);
1904         }
1905
1906         if (c->timer_slack_nsec != (nsec_t) -1)
1907                 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1908
1909         fprintf(f,
1910                 "%sStandardInput: %s\n"
1911                 "%sStandardOutput: %s\n"
1912                 "%sStandardError: %s\n",
1913                 prefix, exec_input_to_string(c->std_input),
1914                 prefix, exec_output_to_string(c->std_output),
1915                 prefix, exec_output_to_string(c->std_error));
1916
1917         if (c->tty_path)
1918                 fprintf(f,
1919                         "%sTTYPath: %s\n"
1920                         "%sTTYReset: %s\n"
1921                         "%sTTYVHangup: %s\n"
1922                         "%sTTYVTDisallocate: %s\n",
1923                         prefix, c->tty_path,
1924                         prefix, yes_no(c->tty_reset),
1925                         prefix, yes_no(c->tty_vhangup),
1926                         prefix, yes_no(c->tty_vt_disallocate));
1927
1928         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1929             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1930             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1931             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1932                 char *fac_str, *lvl_str;
1933                 int r;
1934
1935                 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1936                 if (r < 0)
1937                         fac_str = NULL;
1938
1939                 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1940                 if (r < 0)
1941                         lvl_str = NULL;
1942
1943                 fprintf(f,
1944                         "%sSyslogFacility: %s\n"
1945                         "%sSyslogLevel: %s\n",
1946                         prefix, strna(fac_str),
1947                         prefix, strna(lvl_str));
1948                 free(lvl_str);
1949                 free(fac_str);
1950         }
1951
1952         if (c->capabilities) {
1953                 char *t;
1954                 if ((t = cap_to_text(c->capabilities, NULL))) {
1955                         fprintf(f, "%sCapabilities: %s\n",
1956                                 prefix, t);
1957                         cap_free(t);
1958                 }
1959         }
1960
1961         if (c->secure_bits)
1962                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1963                         prefix,
1964                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
1965                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1966                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1967                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1968                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
1969                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1970
1971         if (c->capability_bounding_set_drop) {
1972                 unsigned long l;
1973                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1974
1975                 for (l = 0; l <= cap_last_cap(); l++)
1976                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1977                                 char *t;
1978
1979                                 if ((t = cap_to_name(l))) {
1980                                         fprintf(f, " %s", t);
1981                                         cap_free(t);
1982                                 }
1983                         }
1984
1985                 fputs("\n", f);
1986         }
1987
1988         if (c->user)
1989                 fprintf(f, "%sUser: %s\n", prefix, c->user);
1990         if (c->group)
1991                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1992
1993         if (strv_length(c->supplementary_groups) > 0) {
1994                 fprintf(f, "%sSupplementaryGroups:", prefix);
1995                 strv_fprintf(f, c->supplementary_groups);
1996                 fputs("\n", f);
1997         }
1998
1999         if (c->pam_name)
2000                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2001
2002         if (strv_length(c->read_write_dirs) > 0) {
2003                 fprintf(f, "%sReadWriteDirs:", prefix);
2004                 strv_fprintf(f, c->read_write_dirs);
2005                 fputs("\n", f);
2006         }
2007
2008         if (strv_length(c->read_only_dirs) > 0) {
2009                 fprintf(f, "%sReadOnlyDirs:", prefix);
2010                 strv_fprintf(f, c->read_only_dirs);
2011                 fputs("\n", f);
2012         }
2013
2014         if (strv_length(c->inaccessible_dirs) > 0) {
2015                 fprintf(f, "%sInaccessibleDirs:", prefix);
2016                 strv_fprintf(f, c->inaccessible_dirs);
2017                 fputs("\n", f);
2018         }
2019
2020         if (c->utmp_id)
2021                 fprintf(f,
2022                         "%sUtmpIdentifier: %s\n",
2023                         prefix, c->utmp_id);
2024 }
2025
2026 void exec_status_start(ExecStatus *s, pid_t pid) {
2027         assert(s);
2028
2029         zero(*s);
2030         s->pid = pid;
2031         dual_timestamp_get(&s->start_timestamp);
2032 }
2033
2034 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2035         assert(s);
2036
2037         if (s->pid && s->pid != pid)
2038                 zero(*s);
2039
2040         s->pid = pid;
2041         dual_timestamp_get(&s->exit_timestamp);
2042
2043         s->code = code;
2044         s->status = status;
2045
2046         if (context) {
2047                 if (context->utmp_id)
2048                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2049
2050                 exec_context_tty_reset(context);
2051         }
2052 }
2053
2054 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2055         char buf[FORMAT_TIMESTAMP_MAX];
2056
2057         assert(s);
2058         assert(f);
2059
2060         if (!prefix)
2061                 prefix = "";
2062
2063         if (s->pid <= 0)
2064                 return;
2065
2066         fprintf(f,
2067                 "%sPID: %lu\n",
2068                 prefix, (unsigned long) s->pid);
2069
2070         if (s->start_timestamp.realtime > 0)
2071                 fprintf(f,
2072                         "%sStart Timestamp: %s\n",
2073                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2074
2075         if (s->exit_timestamp.realtime > 0)
2076                 fprintf(f,
2077                         "%sExit Timestamp: %s\n"
2078                         "%sExit Code: %s\n"
2079                         "%sExit Status: %i\n",
2080                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2081                         prefix, sigchld_code_to_string(s->code),
2082                         prefix, s->status);
2083 }
2084
2085 char *exec_command_line(char **argv) {
2086         size_t k;
2087         char *n, *p, **a;
2088         bool first = true;
2089
2090         assert(argv);
2091
2092         k = 1;
2093         STRV_FOREACH(a, argv)
2094                 k += strlen(*a)+3;
2095
2096         if (!(n = new(char, k)))
2097                 return NULL;
2098
2099         p = n;
2100         STRV_FOREACH(a, argv) {
2101
2102                 if (!first)
2103                         *(p++) = ' ';
2104                 else
2105                         first = false;
2106
2107                 if (strpbrk(*a, WHITESPACE)) {
2108                         *(p++) = '\'';
2109                         p = stpcpy(p, *a);
2110                         *(p++) = '\'';
2111                 } else
2112                         p = stpcpy(p, *a);
2113
2114         }
2115
2116         *p = 0;
2117
2118         /* FIXME: this doesn't really handle arguments that have
2119          * spaces and ticks in them */
2120
2121         return n;
2122 }
2123
2124 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2125         char *p2;
2126         const char *prefix2;
2127
2128         char *cmd;
2129
2130         assert(c);
2131         assert(f);
2132
2133         if (!prefix)
2134                 prefix = "";
2135         p2 = strappend(prefix, "\t");
2136         prefix2 = p2 ? p2 : prefix;
2137
2138         cmd = exec_command_line(c->argv);
2139
2140         fprintf(f,
2141                 "%sCommand Line: %s\n",
2142                 prefix, cmd ? cmd : strerror(ENOMEM));
2143
2144         free(cmd);
2145
2146         exec_status_dump(&c->exec_status, f, prefix2);
2147
2148         free(p2);
2149 }
2150
2151 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2152         assert(f);
2153
2154         if (!prefix)
2155                 prefix = "";
2156
2157         LIST_FOREACH(command, c, c)
2158                 exec_command_dump(c, f, prefix);
2159 }
2160
2161 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2162         ExecCommand *end;
2163
2164         assert(l);
2165         assert(e);
2166
2167         if (*l) {
2168                 /* It's kind of important, that we keep the order here */
2169                 LIST_FIND_TAIL(command, *l, end);
2170                 LIST_INSERT_AFTER(command, *l, end, e);
2171         } else
2172               *l = e;
2173 }
2174
2175 int exec_command_set(ExecCommand *c, const char *path, ...) {
2176         va_list ap;
2177         char **l, *p;
2178
2179         assert(c);
2180         assert(path);
2181
2182         va_start(ap, path);
2183         l = strv_new_ap(path, ap);
2184         va_end(ap);
2185
2186         if (!l)
2187                 return -ENOMEM;
2188
2189         p = strdup(path);
2190         if (!p) {
2191                 strv_free(l);
2192                 return -ENOMEM;
2193         }
2194
2195         free(c->path);
2196         c->path = p;
2197
2198         strv_free(c->argv);
2199         c->argv = l;
2200
2201         return 0;
2202 }
2203
2204 static int exec_runtime_allocate(ExecRuntime **rt) {
2205
2206         if (*rt)
2207                 return 0;
2208
2209         *rt = new0(ExecRuntime, 1);
2210         if (!rt)
2211                 return -ENOMEM;
2212
2213         (*rt)->n_ref = 1;
2214         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2215
2216         return 0;
2217 }
2218
2219 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2220         int r;
2221
2222         assert(rt);
2223         assert(c);
2224         assert(id);
2225
2226         if (*rt)
2227                 return 1;
2228
2229         if (!c->private_network && !c->private_tmp)
2230                 return 0;
2231
2232         r = exec_runtime_allocate(rt);
2233         if (r < 0)
2234                 return r;
2235
2236         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2237                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2238                         return -errno;
2239         }
2240
2241         if (c->private_tmp && !(*rt)->tmp_dir) {
2242                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2243                 if (r < 0)
2244                         return r;
2245         }
2246
2247         return 1;
2248 }
2249
2250 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2251         assert(r);
2252         assert(r->n_ref > 0);
2253
2254         r->n_ref++;
2255         return r;
2256 }
2257
2258 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2259
2260         if (!r)
2261                 return NULL;
2262
2263         assert(r->n_ref > 0);
2264
2265         r->n_ref--;
2266         if (r->n_ref <= 0) {
2267                 free(r->tmp_dir);
2268                 free(r->var_tmp_dir);
2269                 close_pipe(r->netns_storage_socket);
2270                 free(r);
2271         }
2272
2273         return NULL;
2274 }
2275
2276 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2277         assert(u);
2278         assert(f);
2279         assert(fds);
2280
2281         if (!rt)
2282                 return 0;
2283
2284         if (rt->tmp_dir)
2285                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2286
2287         if (rt->var_tmp_dir)
2288                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2289
2290         if (rt->netns_storage_socket[0] >= 0) {
2291                 int copy;
2292
2293                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2294                 if (copy < 0)
2295                         return copy;
2296
2297                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2298         }
2299
2300         if (rt->netns_storage_socket[1] >= 0) {
2301                 int copy;
2302
2303                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2304                 if (copy < 0)
2305                         return copy;
2306
2307                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2308         }
2309
2310         return 0;
2311 }
2312
2313 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2314         int r;
2315
2316         assert(rt);
2317         assert(key);
2318         assert(value);
2319
2320         if (streq(key, "tmp-dir")) {
2321                 char *copy;
2322
2323                 r = exec_runtime_allocate(rt);
2324                 if (r < 0)
2325                         return r;
2326
2327                 copy = strdup(value);
2328                 if (!copy)
2329                         return log_oom();
2330
2331                 free((*rt)->tmp_dir);
2332                 (*rt)->tmp_dir = copy;
2333
2334         } else if (streq(key, "var-tmp-dir")) {
2335                 char *copy;
2336
2337                 r = exec_runtime_allocate(rt);
2338                 if (r < 0)
2339                         return r;
2340
2341                 copy = strdup(value);
2342                 if (!copy)
2343                         return log_oom();
2344
2345                 free((*rt)->var_tmp_dir);
2346                 (*rt)->var_tmp_dir = copy;
2347
2348         } else if (streq(key, "netns-socket-0")) {
2349                 int fd;
2350
2351                 r = exec_runtime_allocate(rt);
2352                 if (r < 0)
2353                         return r;
2354
2355                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2356                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2357                 else {
2358                         if ((*rt)->netns_storage_socket[0] >= 0)
2359                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2360
2361                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2362                 }
2363         } else if (streq(key, "netns-socket-1")) {
2364                 int fd;
2365
2366                 r = exec_runtime_allocate(rt);
2367                 if (r < 0)
2368                         return r;
2369
2370                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2371                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2372                 else {
2373                         if ((*rt)->netns_storage_socket[1] >= 0)
2374                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2375
2376                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2377                 }
2378         } else
2379                 return 0;
2380
2381         return 1;
2382 }
2383
2384 static void *remove_tmpdir_thread(void *p) {
2385         _cleanup_free_ char *path = p;
2386
2387         rm_rf_dangerous(path, false, true, false);
2388         return NULL;
2389 }
2390
2391 void exec_runtime_destroy(ExecRuntime *rt) {
2392         if (!rt)
2393                 return;
2394
2395         /* If there are multiple users of this, let's leave the stuff around */
2396         if (rt->n_ref > 1)
2397                 return;
2398
2399         if (rt->tmp_dir) {
2400                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2401                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2402                 rt->tmp_dir = NULL;
2403         }
2404
2405         if (rt->var_tmp_dir) {
2406                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2407                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2408                 rt->var_tmp_dir = NULL;
2409         }
2410
2411         close_pipe(rt->netns_storage_socket);
2412 }
2413
2414 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2415         [EXEC_INPUT_NULL] = "null",
2416         [EXEC_INPUT_TTY] = "tty",
2417         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2418         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2419         [EXEC_INPUT_SOCKET] = "socket"
2420 };
2421
2422 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2423
2424 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2425         [EXEC_OUTPUT_INHERIT] = "inherit",
2426         [EXEC_OUTPUT_NULL] = "null",
2427         [EXEC_OUTPUT_TTY] = "tty",
2428         [EXEC_OUTPUT_SYSLOG] = "syslog",
2429         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2430         [EXEC_OUTPUT_KMSG] = "kmsg",
2431         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2432         [EXEC_OUTPUT_JOURNAL] = "journal",
2433         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2434         [EXEC_OUTPUT_SOCKET] = "socket"
2435 };
2436
2437 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);