chiark / gitweb /
Introduce cleanup functions for cap_free
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <linux/seccomp-bpf.h>
42 #include <glob.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #include "execute.h"
51 #include "strv.h"
52 #include "macro.h"
53 #include "capability.h"
54 #include "util.h"
55 #include "log.h"
56 #include "sd-messages.h"
57 #include "ioprio.h"
58 #include "securebits.h"
59 #include "namespace.h"
60 #include "tcpwrap.h"
61 #include "exit-status.h"
62 #include "missing.h"
63 #include "utmp-wtmp.h"
64 #include "def.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
67 #include "env-util.h"
68 #include "fileio.h"
69 #include "unit.h"
70 #include "async.h"
71
72 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
73 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
74
75 /* This assumes there is a 'tty' group */
76 #define TTY_MODE 0620
77
78 #define SNDBUF_SIZE (8*1024*1024)
79
80 static int shift_fds(int fds[], unsigned n_fds) {
81         int start, restart_from;
82
83         if (n_fds <= 0)
84                 return 0;
85
86         /* Modifies the fds array! (sorts it) */
87
88         assert(fds);
89
90         start = 0;
91         for (;;) {
92                 int i;
93
94                 restart_from = -1;
95
96                 for (i = start; i < (int) n_fds; i++) {
97                         int nfd;
98
99                         /* Already at right index? */
100                         if (fds[i] == i+3)
101                                 continue;
102
103                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
104                                 return -errno;
105
106                         close_nointr_nofail(fds[i]);
107                         fds[i] = nfd;
108
109                         /* Hmm, the fd we wanted isn't free? Then
110                          * let's remember that and try again from here*/
111                         if (nfd != i+3 && restart_from < 0)
112                                 restart_from = i;
113                 }
114
115                 if (restart_from < 0)
116                         break;
117
118                 start = restart_from;
119         }
120
121         return 0;
122 }
123
124 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
125         unsigned i;
126         int r;
127
128         if (n_fds <= 0)
129                 return 0;
130
131         assert(fds);
132
133         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
134
135         for (i = 0; i < n_fds; i++) {
136
137                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
138                         return r;
139
140                 /* We unconditionally drop FD_CLOEXEC from the fds,
141                  * since after all we want to pass these fds to our
142                  * children */
143
144                 if ((r = fd_cloexec(fds[i], false)) < 0)
145                         return r;
146         }
147
148         return 0;
149 }
150
151 _pure_ static const char *tty_path(const ExecContext *context) {
152         assert(context);
153
154         if (context->tty_path)
155                 return context->tty_path;
156
157         return "/dev/console";
158 }
159
160 static void exec_context_tty_reset(const ExecContext *context) {
161         assert(context);
162
163         if (context->tty_vhangup)
164                 terminal_vhangup(tty_path(context));
165
166         if (context->tty_reset)
167                 reset_terminal(tty_path(context));
168
169         if (context->tty_vt_disallocate && context->tty_path)
170                 vt_disallocate(context->tty_path);
171 }
172
173 static bool is_terminal_output(ExecOutput o) {
174         return
175                 o == EXEC_OUTPUT_TTY ||
176                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
177                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
178                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
179 }
180
181 static int open_null_as(int flags, int nfd) {
182         int fd, r;
183
184         assert(nfd >= 0);
185
186         fd = open("/dev/null", flags|O_NOCTTY);
187         if (fd < 0)
188                 return -errno;
189
190         if (fd != nfd) {
191                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
192                 close_nointr_nofail(fd);
193         } else
194                 r = nfd;
195
196         return r;
197 }
198
199 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
200         int fd, r;
201         union sockaddr_union sa = {
202                 .un.sun_family = AF_UNIX,
203                 .un.sun_path = "/run/systemd/journal/stdout",
204         };
205
206         assert(context);
207         assert(output < _EXEC_OUTPUT_MAX);
208         assert(ident);
209         assert(nfd >= 0);
210
211         fd = socket(AF_UNIX, SOCK_STREAM, 0);
212         if (fd < 0)
213                 return -errno;
214
215         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
216         if (r < 0) {
217                 close_nointr_nofail(fd);
218                 return -errno;
219         }
220
221         if (shutdown(fd, SHUT_RD) < 0) {
222                 close_nointr_nofail(fd);
223                 return -errno;
224         }
225
226         fd_inc_sndbuf(fd, SNDBUF_SIZE);
227
228         dprintf(fd,
229                 "%s\n"
230                 "%s\n"
231                 "%i\n"
232                 "%i\n"
233                 "%i\n"
234                 "%i\n"
235                 "%i\n",
236                 context->syslog_identifier ? context->syslog_identifier : ident,
237                 unit_id,
238                 context->syslog_priority,
239                 !!context->syslog_level_prefix,
240                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
241                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
242                 is_terminal_output(output));
243
244         if (fd != nfd) {
245                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
246                 close_nointr_nofail(fd);
247         } else
248                 r = nfd;
249
250         return r;
251 }
252 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
253         int fd, r;
254
255         assert(path);
256         assert(nfd >= 0);
257
258         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
259                 return fd;
260
261         if (fd != nfd) {
262                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
263                 close_nointr_nofail(fd);
264         } else
265                 r = nfd;
266
267         return r;
268 }
269
270 static bool is_terminal_input(ExecInput i) {
271         return
272                 i == EXEC_INPUT_TTY ||
273                 i == EXEC_INPUT_TTY_FORCE ||
274                 i == EXEC_INPUT_TTY_FAIL;
275 }
276
277 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
278
279         if (is_terminal_input(std_input) && !apply_tty_stdin)
280                 return EXEC_INPUT_NULL;
281
282         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
283                 return EXEC_INPUT_NULL;
284
285         return std_input;
286 }
287
288 static int fixup_output(ExecOutput std_output, int socket_fd) {
289
290         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
291                 return EXEC_OUTPUT_INHERIT;
292
293         return std_output;
294 }
295
296 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
297         ExecInput i;
298
299         assert(context);
300
301         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
302
303         switch (i) {
304
305         case EXEC_INPUT_NULL:
306                 return open_null_as(O_RDONLY, STDIN_FILENO);
307
308         case EXEC_INPUT_TTY:
309         case EXEC_INPUT_TTY_FORCE:
310         case EXEC_INPUT_TTY_FAIL: {
311                 int fd, r;
312
313                 fd = acquire_terminal(tty_path(context),
314                                       i == EXEC_INPUT_TTY_FAIL,
315                                       i == EXEC_INPUT_TTY_FORCE,
316                                       false,
317                                       (usec_t) -1);
318                 if (fd < 0)
319                         return fd;
320
321                 if (fd != STDIN_FILENO) {
322                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
323                         close_nointr_nofail(fd);
324                 } else
325                         r = STDIN_FILENO;
326
327                 return r;
328         }
329
330         case EXEC_INPUT_SOCKET:
331                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
332
333         default:
334                 assert_not_reached("Unknown input type");
335         }
336 }
337
338 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
339         ExecOutput o;
340         ExecInput i;
341         int r;
342
343         assert(context);
344         assert(ident);
345
346         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
347         o = fixup_output(context->std_output, socket_fd);
348
349         if (fileno == STDERR_FILENO) {
350                 ExecOutput e;
351                 e = fixup_output(context->std_error, socket_fd);
352
353                 /* This expects the input and output are already set up */
354
355                 /* Don't change the stderr file descriptor if we inherit all
356                  * the way and are not on a tty */
357                 if (e == EXEC_OUTPUT_INHERIT &&
358                     o == EXEC_OUTPUT_INHERIT &&
359                     i == EXEC_INPUT_NULL &&
360                     !is_terminal_input(context->std_input) &&
361                     getppid () != 1)
362                         return fileno;
363
364                 /* Duplicate from stdout if possible */
365                 if (e == o || e == EXEC_OUTPUT_INHERIT)
366                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
367
368                 o = e;
369
370         } else if (o == EXEC_OUTPUT_INHERIT) {
371                 /* If input got downgraded, inherit the original value */
372                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
373                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
374
375                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
376                 if (i != EXEC_INPUT_NULL)
377                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
378
379                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
380                 if (getppid() != 1)
381                         return fileno;
382
383                 /* We need to open /dev/null here anew, to get the right access mode. */
384                 return open_null_as(O_WRONLY, fileno);
385         }
386
387         switch (o) {
388
389         case EXEC_OUTPUT_NULL:
390                 return open_null_as(O_WRONLY, fileno);
391
392         case EXEC_OUTPUT_TTY:
393                 if (is_terminal_input(i))
394                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
395
396                 /* We don't reset the terminal if this is just about output */
397                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
398
399         case EXEC_OUTPUT_SYSLOG:
400         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
401         case EXEC_OUTPUT_KMSG:
402         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
403         case EXEC_OUTPUT_JOURNAL:
404         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
405                 r = connect_logger_as(context, o, ident, unit_id, fileno);
406                 if (r < 0) {
407                         log_struct_unit(LOG_CRIT, unit_id,
408                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
409                                 fileno == STDOUT_FILENO ? "out" : "err",
410                                 unit_id, strerror(-r),
411                                 "ERRNO=%d", -r,
412                                 NULL);
413                         r = open_null_as(O_WRONLY, fileno);
414                 }
415                 return r;
416
417         case EXEC_OUTPUT_SOCKET:
418                 assert(socket_fd >= 0);
419                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
420
421         default:
422                 assert_not_reached("Unknown error type");
423         }
424 }
425
426 static int chown_terminal(int fd, uid_t uid) {
427         struct stat st;
428
429         assert(fd >= 0);
430
431         /* This might fail. What matters are the results. */
432         (void) fchown(fd, uid, -1);
433         (void) fchmod(fd, TTY_MODE);
434
435         if (fstat(fd, &st) < 0)
436                 return -errno;
437
438         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
439                 return -EPERM;
440
441         return 0;
442 }
443
444 static int setup_confirm_stdio(int *_saved_stdin,
445                                int *_saved_stdout) {
446         int fd = -1, saved_stdin, saved_stdout = -1, r;
447
448         assert(_saved_stdin);
449         assert(_saved_stdout);
450
451         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
452         if (saved_stdin < 0)
453                 return -errno;
454
455         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
456         if (saved_stdout < 0) {
457                 r = errno;
458                 goto fail;
459         }
460
461         fd = acquire_terminal(
462                         "/dev/console",
463                         false,
464                         false,
465                         false,
466                         DEFAULT_CONFIRM_USEC);
467         if (fd < 0) {
468                 r = fd;
469                 goto fail;
470         }
471
472         r = chown_terminal(fd, getuid());
473         if (r < 0)
474                 goto fail;
475
476         if (dup2(fd, STDIN_FILENO) < 0) {
477                 r = -errno;
478                 goto fail;
479         }
480
481         if (dup2(fd, STDOUT_FILENO) < 0) {
482                 r = -errno;
483                 goto fail;
484         }
485
486         if (fd >= 2)
487                 close_nointr_nofail(fd);
488
489         *_saved_stdin = saved_stdin;
490         *_saved_stdout = saved_stdout;
491
492         return 0;
493
494 fail:
495         if (saved_stdout >= 0)
496                 close_nointr_nofail(saved_stdout);
497
498         if (saved_stdin >= 0)
499                 close_nointr_nofail(saved_stdin);
500
501         if (fd >= 0)
502                 close_nointr_nofail(fd);
503
504         return r;
505 }
506
507 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
508         int fd;
509         va_list ap;
510
511         assert(format);
512
513         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
514         if (fd < 0)
515                 return fd;
516
517         va_start(ap, format);
518         vdprintf(fd, format, ap);
519         va_end(ap);
520
521         close_nointr_nofail(fd);
522
523         return 0;
524 }
525
526 static int restore_confirm_stdio(int *saved_stdin,
527                                  int *saved_stdout) {
528
529         int r = 0;
530
531         assert(saved_stdin);
532         assert(saved_stdout);
533
534         release_terminal();
535
536         if (*saved_stdin >= 0)
537                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
538                         r = -errno;
539
540         if (*saved_stdout >= 0)
541                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
542                         r = -errno;
543
544         if (*saved_stdin >= 0)
545                 close_nointr_nofail(*saved_stdin);
546
547         if (*saved_stdout >= 0)
548                 close_nointr_nofail(*saved_stdout);
549
550         return r;
551 }
552
553 static int ask_for_confirmation(char *response, char **argv) {
554         int saved_stdout = -1, saved_stdin = -1, r;
555         char *line;
556
557         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
558         if (r < 0)
559                 return r;
560
561         line = exec_command_line(argv);
562         if (!line)
563                 return -ENOMEM;
564
565         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
566         free(line);
567
568         restore_confirm_stdio(&saved_stdin, &saved_stdout);
569
570         return r;
571 }
572
573 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
574         bool keep_groups = false;
575         int r;
576
577         assert(context);
578
579         /* Lookup and set GID and supplementary group list. Here too
580          * we avoid NSS lookups for gid=0. */
581
582         if (context->group || username) {
583
584                 if (context->group) {
585                         const char *g = context->group;
586
587                         if ((r = get_group_creds(&g, &gid)) < 0)
588                                 return r;
589                 }
590
591                 /* First step, initialize groups from /etc/groups */
592                 if (username && gid != 0) {
593                         if (initgroups(username, gid) < 0)
594                                 return -errno;
595
596                         keep_groups = true;
597                 }
598
599                 /* Second step, set our gids */
600                 if (setresgid(gid, gid, gid) < 0)
601                         return -errno;
602         }
603
604         if (context->supplementary_groups) {
605                 int ngroups_max, k;
606                 gid_t *gids;
607                 char **i;
608
609                 /* Final step, initialize any manually set supplementary groups */
610                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
611
612                 if (!(gids = new(gid_t, ngroups_max)))
613                         return -ENOMEM;
614
615                 if (keep_groups) {
616                         if ((k = getgroups(ngroups_max, gids)) < 0) {
617                                 free(gids);
618                                 return -errno;
619                         }
620                 } else
621                         k = 0;
622
623                 STRV_FOREACH(i, context->supplementary_groups) {
624                         const char *g;
625
626                         if (k >= ngroups_max) {
627                                 free(gids);
628                                 return -E2BIG;
629                         }
630
631                         g = *i;
632                         r = get_group_creds(&g, gids+k);
633                         if (r < 0) {
634                                 free(gids);
635                                 return r;
636                         }
637
638                         k++;
639                 }
640
641                 if (setgroups(k, gids) < 0) {
642                         free(gids);
643                         return -errno;
644                 }
645
646                 free(gids);
647         }
648
649         return 0;
650 }
651
652 static int enforce_user(const ExecContext *context, uid_t uid) {
653         assert(context);
654
655         /* Sets (but doesn't lookup) the uid and make sure we keep the
656          * capabilities while doing so. */
657
658         if (context->capabilities) {
659                 _cleanup_cap_free_ cap_t d = NULL;
660                 static const cap_value_t bits[] = {
661                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
662                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
663                 };
664
665                 /* First step: If we need to keep capabilities but
666                  * drop privileges we need to make sure we keep our
667                  * caps, while we drop privileges. */
668                 if (uid != 0) {
669                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
670
671                         if (prctl(PR_GET_SECUREBITS) != sb)
672                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
673                                         return -errno;
674                 }
675
676                 /* Second step: set the capabilities. This will reduce
677                  * the capabilities to the minimum we need. */
678
679                 d = cap_dup(context->capabilities);
680                 if (!d)
681                         return -errno;
682
683                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
684                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
685                         return -errno;
686
687                 if (cap_set_proc(d) < 0)
688                         return -errno;
689         }
690
691         /* Third step: actually set the uids */
692         if (setresuid(uid, uid, uid) < 0)
693                 return -errno;
694
695         /* At this point we should have all necessary capabilities but
696            are otherwise a normal user. However, the caps might got
697            corrupted due to the setresuid() so we need clean them up
698            later. This is done outside of this call. */
699
700         return 0;
701 }
702
703 #ifdef HAVE_PAM
704
705 static int null_conv(
706                 int num_msg,
707                 const struct pam_message **msg,
708                 struct pam_response **resp,
709                 void *appdata_ptr) {
710
711         /* We don't support conversations */
712
713         return PAM_CONV_ERR;
714 }
715
716 static int setup_pam(
717                 const char *name,
718                 const char *user,
719                 uid_t uid,
720                 const char *tty,
721                 char ***pam_env,
722                 int fds[], unsigned n_fds) {
723
724         static const struct pam_conv conv = {
725                 .conv = null_conv,
726                 .appdata_ptr = NULL
727         };
728
729         pam_handle_t *handle = NULL;
730         sigset_t ss, old_ss;
731         int pam_code = PAM_SUCCESS;
732         int err;
733         char **e = NULL;
734         bool close_session = false;
735         pid_t pam_pid = 0, parent_pid;
736         int flags = 0;
737
738         assert(name);
739         assert(user);
740         assert(pam_env);
741
742         /* We set up PAM in the parent process, then fork. The child
743          * will then stay around until killed via PR_GET_PDEATHSIG or
744          * systemd via the cgroup logic. It will then remove the PAM
745          * session again. The parent process will exec() the actual
746          * daemon. We do things this way to ensure that the main PID
747          * of the daemon is the one we initially fork()ed. */
748
749         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
750                 flags |= PAM_SILENT;
751
752         pam_code = pam_start(name, user, &conv, &handle);
753         if (pam_code != PAM_SUCCESS) {
754                 handle = NULL;
755                 goto fail;
756         }
757
758         if (tty) {
759                 pam_code = pam_set_item(handle, PAM_TTY, tty);
760                 if (pam_code != PAM_SUCCESS)
761                         goto fail;
762         }
763
764         pam_code = pam_acct_mgmt(handle, flags);
765         if (pam_code != PAM_SUCCESS)
766                 goto fail;
767
768         pam_code = pam_open_session(handle, flags);
769         if (pam_code != PAM_SUCCESS)
770                 goto fail;
771
772         close_session = true;
773
774         e = pam_getenvlist(handle);
775         if (!e) {
776                 pam_code = PAM_BUF_ERR;
777                 goto fail;
778         }
779
780         /* Block SIGTERM, so that we know that it won't get lost in
781          * the child */
782         if (sigemptyset(&ss) < 0 ||
783             sigaddset(&ss, SIGTERM) < 0 ||
784             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
785                 goto fail;
786
787         parent_pid = getpid();
788
789         pam_pid = fork();
790         if (pam_pid < 0)
791                 goto fail;
792
793         if (pam_pid == 0) {
794                 int sig;
795                 int r = EXIT_PAM;
796
797                 /* The child's job is to reset the PAM session on
798                  * termination */
799
800                 /* This string must fit in 10 chars (i.e. the length
801                  * of "/sbin/init"), to look pretty in /bin/ps */
802                 rename_process("(sd-pam)");
803
804                 /* Make sure we don't keep open the passed fds in this
805                 child. We assume that otherwise only those fds are
806                 open here that have been opened by PAM. */
807                 close_many(fds, n_fds);
808
809                 /* Drop privileges - we don't need any to pam_close_session
810                  * and this will make PR_SET_PDEATHSIG work in most cases.
811                  * If this fails, ignore the error - but expect sd-pam threads
812                  * to fail to exit normally */
813                 if (setresuid(uid, uid, uid) < 0)
814                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
815
816                 /* Wait until our parent died. This will only work if
817                  * the above setresuid() succeeds, otherwise the kernel
818                  * will not allow unprivileged parents kill their privileged
819                  * children this way. We rely on the control groups kill logic
820                  * to do the rest for us. */
821                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
822                         goto child_finish;
823
824                 /* Check if our parent process might already have
825                  * died? */
826                 if (getppid() == parent_pid) {
827                         for (;;) {
828                                 if (sigwait(&ss, &sig) < 0) {
829                                         if (errno == EINTR)
830                                                 continue;
831
832                                         goto child_finish;
833                                 }
834
835                                 assert(sig == SIGTERM);
836                                 break;
837                         }
838                 }
839
840                 /* If our parent died we'll end the session */
841                 if (getppid() != parent_pid) {
842                         pam_code = pam_close_session(handle, flags);
843                         if (pam_code != PAM_SUCCESS)
844                                 goto child_finish;
845                 }
846
847                 r = 0;
848
849         child_finish:
850                 pam_end(handle, pam_code | flags);
851                 _exit(r);
852         }
853
854         /* If the child was forked off successfully it will do all the
855          * cleanups, so forget about the handle here. */
856         handle = NULL;
857
858         /* Unblock SIGTERM again in the parent */
859         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
860                 goto fail;
861
862         /* We close the log explicitly here, since the PAM modules
863          * might have opened it, but we don't want this fd around. */
864         closelog();
865
866         *pam_env = e;
867         e = NULL;
868
869         return 0;
870
871 fail:
872         if (pam_code != PAM_SUCCESS) {
873                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
874                 err = -EPERM;  /* PAM errors do not map to errno */
875         } else {
876                 log_error("PAM failed: %m");
877                 err = -errno;
878         }
879
880         if (handle) {
881                 if (close_session)
882                         pam_code = pam_close_session(handle, flags);
883
884                 pam_end(handle, pam_code | flags);
885         }
886
887         strv_free(e);
888
889         closelog();
890
891         if (pam_pid > 1) {
892                 kill(pam_pid, SIGTERM);
893                 kill(pam_pid, SIGCONT);
894         }
895
896         return err;
897 }
898 #endif
899
900 static void rename_process_from_path(const char *path) {
901         char process_name[11];
902         const char *p;
903         size_t l;
904
905         /* This resulting string must fit in 10 chars (i.e. the length
906          * of "/sbin/init") to look pretty in /bin/ps */
907
908         p = basename(path);
909         if (isempty(p)) {
910                 rename_process("(...)");
911                 return;
912         }
913
914         l = strlen(p);
915         if (l > 8) {
916                 /* The end of the process name is usually more
917                  * interesting, since the first bit might just be
918                  * "systemd-" */
919                 p = p + l - 8;
920                 l = 8;
921         }
922
923         process_name[0] = '(';
924         memcpy(process_name+1, p, l);
925         process_name[1+l] = ')';
926         process_name[1+l+1] = 0;
927
928         rename_process(process_name);
929 }
930
931 static int apply_seccomp(uint32_t *syscall_filter) {
932         static const struct sock_filter header[] = {
933                 VALIDATE_ARCHITECTURE,
934                 EXAMINE_SYSCALL
935         };
936         static const struct sock_filter footer[] = {
937                 _KILL_PROCESS
938         };
939
940         int i;
941         unsigned n;
942         struct sock_filter *f;
943         struct sock_fprog prog = {};
944
945         assert(syscall_filter);
946
947         /* First: count the syscalls to check for */
948         for (i = 0, n = 0; i < syscall_max(); i++)
949                 if (syscall_filter[i >> 4] & (1 << (i & 31)))
950                         n++;
951
952         /* Second: build the filter program from a header the syscall
953          * matches and the footer */
954         f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
955         memcpy(f, header, sizeof(header));
956
957         for (i = 0, n = 0; i < syscall_max(); i++)
958                 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
959                         struct sock_filter item[] = {
960                                 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
961                                 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
962                         };
963
964                         assert_cc(ELEMENTSOF(item) == 2);
965
966                         f[ELEMENTSOF(header) + 2*n]  = item[0];
967                         f[ELEMENTSOF(header) + 2*n+1] = item[1];
968
969                         n++;
970                 }
971
972         memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
973
974         /* Third: install the filter */
975         prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
976         prog.filter = f;
977         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
978                 return -errno;
979
980         return 0;
981 }
982
983 static void do_idle_pipe_dance(int idle_pipe[4]) {
984         assert(idle_pipe);
985
986         if (idle_pipe[1] >= 0)
987                 close_nointr_nofail(idle_pipe[1]);
988         if (idle_pipe[2] >= 0)
989                 close_nointr_nofail(idle_pipe[2]);
990
991         if (idle_pipe[0] >= 0) {
992                 int r;
993
994                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
995
996                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
997                         /* Signal systemd that we are bored and want to continue. */
998                         write(idle_pipe[3], "x", 1);
999
1000                         /* Wait for systemd to react to the signal above. */
1001                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1002                 }
1003
1004                 close_nointr_nofail(idle_pipe[0]);
1005
1006         }
1007
1008         if (idle_pipe[3] >= 0)
1009                 close_nointr_nofail(idle_pipe[3]);
1010 }
1011
1012 static int build_environment(
1013                 ExecContext *c,
1014                 unsigned n_fds,
1015                 usec_t watchdog_usec,
1016                 const char *home,
1017                 const char *username,
1018                 const char *shell,
1019                 char ***ret) {
1020
1021         _cleanup_strv_free_ char **our_env = NULL;
1022         unsigned n_env = 0;
1023         char *x;
1024
1025         assert(c);
1026         assert(ret);
1027
1028         our_env = new0(char*, 10);
1029         if (!our_env)
1030                 return -ENOMEM;
1031
1032         if (n_fds > 0) {
1033                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1034                         return -ENOMEM;
1035                 our_env[n_env++] = x;
1036
1037                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1038                         return -ENOMEM;
1039                 our_env[n_env++] = x;
1040         }
1041
1042         if (watchdog_usec > 0) {
1043                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1044                         return -ENOMEM;
1045                 our_env[n_env++] = x;
1046
1047                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1048                         return -ENOMEM;
1049                 our_env[n_env++] = x;
1050         }
1051
1052         if (home) {
1053                 x = strappend("HOME=", home);
1054                 if (!x)
1055                         return -ENOMEM;
1056                 our_env[n_env++] = x;
1057         }
1058
1059         if (username) {
1060                 x = strappend("LOGNAME=", username);
1061                 if (!x)
1062                         return -ENOMEM;
1063                 our_env[n_env++] = x;
1064
1065                 x = strappend("USER=", username);
1066                 if (!x)
1067                         return -ENOMEM;
1068                 our_env[n_env++] = x;
1069         }
1070
1071         if (shell) {
1072                 x = strappend("SHELL=", shell);
1073                 if (!x)
1074                         return -ENOMEM;
1075                 our_env[n_env++] = x;
1076         }
1077
1078         if (is_terminal_input(c->std_input) ||
1079             c->std_output == EXEC_OUTPUT_TTY ||
1080             c->std_error == EXEC_OUTPUT_TTY ||
1081             c->tty_path) {
1082
1083                 x = strdup(default_term_for_tty(tty_path(c)));
1084                 if (!x)
1085                         return -ENOMEM;
1086                 our_env[n_env++] = x;
1087         }
1088
1089         our_env[n_env++] = NULL;
1090         assert(n_env <= 10);
1091
1092         *ret = our_env;
1093         our_env = NULL;
1094
1095         return 0;
1096 }
1097
1098 int exec_spawn(ExecCommand *command,
1099                char **argv,
1100                ExecContext *context,
1101                int fds[], unsigned n_fds,
1102                char **environment,
1103                bool apply_permissions,
1104                bool apply_chroot,
1105                bool apply_tty_stdin,
1106                bool confirm_spawn,
1107                CGroupControllerMask cgroup_supported,
1108                const char *cgroup_path,
1109                const char *unit_id,
1110                usec_t watchdog_usec,
1111                int idle_pipe[4],
1112                ExecRuntime *runtime,
1113                pid_t *ret) {
1114
1115         _cleanup_strv_free_ char **files_env = NULL;
1116         int socket_fd;
1117         char *line;
1118         pid_t pid;
1119         int r;
1120
1121         assert(command);
1122         assert(context);
1123         assert(ret);
1124         assert(fds || n_fds <= 0);
1125
1126         if (context->std_input == EXEC_INPUT_SOCKET ||
1127             context->std_output == EXEC_OUTPUT_SOCKET ||
1128             context->std_error == EXEC_OUTPUT_SOCKET) {
1129
1130                 if (n_fds != 1)
1131                         return -EINVAL;
1132
1133                 socket_fd = fds[0];
1134
1135                 fds = NULL;
1136                 n_fds = 0;
1137         } else
1138                 socket_fd = -1;
1139
1140         r = exec_context_load_environment(context, &files_env);
1141         if (r < 0) {
1142                 log_struct_unit(LOG_ERR,
1143                            unit_id,
1144                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1145                            "ERRNO=%d", -r,
1146                            NULL);
1147                 return r;
1148         }
1149
1150         if (!argv)
1151                 argv = command->argv;
1152
1153         line = exec_command_line(argv);
1154         if (!line)
1155                 return log_oom();
1156
1157         log_struct_unit(LOG_DEBUG,
1158                         unit_id,
1159                         "EXECUTABLE=%s", command->path,
1160                         "MESSAGE=About to execute: %s", line,
1161                         NULL);
1162         free(line);
1163
1164         pid = fork();
1165         if (pid < 0)
1166                 return -errno;
1167
1168         if (pid == 0) {
1169                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1170                 const char *username = NULL, *home = NULL, *shell = NULL;
1171                 unsigned n_dont_close = 0;
1172                 int dont_close[n_fds + 3];
1173                 uid_t uid = (uid_t) -1;
1174                 gid_t gid = (gid_t) -1;
1175                 sigset_t ss;
1176                 int i, err;
1177
1178                 /* child */
1179
1180                 rename_process_from_path(command->path);
1181
1182                 /* We reset exactly these signals, since they are the
1183                  * only ones we set to SIG_IGN in the main daemon. All
1184                  * others we leave untouched because we set them to
1185                  * SIG_DFL or a valid handler initially, both of which
1186                  * will be demoted to SIG_DFL. */
1187                 default_signals(SIGNALS_CRASH_HANDLER,
1188                                 SIGNALS_IGNORE, -1);
1189
1190                 if (context->ignore_sigpipe)
1191                         ignore_signals(SIGPIPE, -1);
1192
1193                 assert_se(sigemptyset(&ss) == 0);
1194                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1195                         err = -errno;
1196                         r = EXIT_SIGNAL_MASK;
1197                         goto fail_child;
1198                 }
1199
1200                 if (idle_pipe)
1201                         do_idle_pipe_dance(idle_pipe);
1202
1203                 /* Close sockets very early to make sure we don't
1204                  * block init reexecution because it cannot bind its
1205                  * sockets */
1206                 log_forget_fds();
1207
1208                 if (socket_fd >= 0)
1209                         dont_close[n_dont_close++] = socket_fd;
1210                 if (n_fds > 0) {
1211                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1212                         n_dont_close += n_fds;
1213                 }
1214                 if (runtime) {
1215                         if (runtime->netns_storage_socket[0] >= 0)
1216                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1217                         if (runtime->netns_storage_socket[1] >= 0)
1218                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1219                 }
1220
1221                 err = close_all_fds(dont_close, n_dont_close);
1222                 if (err < 0) {
1223                         r = EXIT_FDS;
1224                         goto fail_child;
1225                 }
1226
1227                 if (!context->same_pgrp)
1228                         if (setsid() < 0) {
1229                                 err = -errno;
1230                                 r = EXIT_SETSID;
1231                                 goto fail_child;
1232                         }
1233
1234                 if (context->tcpwrap_name) {
1235                         if (socket_fd >= 0)
1236                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1237                                         err = -EACCES;
1238                                         r = EXIT_TCPWRAP;
1239                                         goto fail_child;
1240                                 }
1241
1242                         for (i = 0; i < (int) n_fds; i++) {
1243                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1244                                         err = -EACCES;
1245                                         r = EXIT_TCPWRAP;
1246                                         goto fail_child;
1247                                 }
1248                         }
1249                 }
1250
1251                 exec_context_tty_reset(context);
1252
1253                 if (confirm_spawn) {
1254                         char response;
1255
1256                         err = ask_for_confirmation(&response, argv);
1257                         if (err == -ETIMEDOUT)
1258                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1259                         else if (err < 0)
1260                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1261                         else if (response == 's') {
1262                                 write_confirm_message("Skipping execution.\n");
1263                                 err = -ECANCELED;
1264                                 r = EXIT_CONFIRM;
1265                                 goto fail_child;
1266                         } else if (response == 'n') {
1267                                 write_confirm_message("Failing execution.\n");
1268                                 err = r = 0;
1269                                 goto fail_child;
1270                         }
1271                 }
1272
1273                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1274                  * must sure to drop O_NONBLOCK */
1275                 if (socket_fd >= 0)
1276                         fd_nonblock(socket_fd, false);
1277
1278                 err = setup_input(context, socket_fd, apply_tty_stdin);
1279                 if (err < 0) {
1280                         r = EXIT_STDIN;
1281                         goto fail_child;
1282                 }
1283
1284                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1285                 if (err < 0) {
1286                         r = EXIT_STDOUT;
1287                         goto fail_child;
1288                 }
1289
1290                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1291                 if (err < 0) {
1292                         r = EXIT_STDERR;
1293                         goto fail_child;
1294                 }
1295
1296                 if (cgroup_path) {
1297                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1298                         if (err < 0) {
1299                                 r = EXIT_CGROUP;
1300                                 goto fail_child;
1301                         }
1302                 }
1303
1304                 if (context->oom_score_adjust_set) {
1305                         char t[16];
1306
1307                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1308                         char_array_0(t);
1309
1310                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1311                                 err = -errno;
1312                                 r = EXIT_OOM_ADJUST;
1313                                 goto fail_child;
1314                         }
1315                 }
1316
1317                 if (context->nice_set)
1318                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1319                                 err = -errno;
1320                                 r = EXIT_NICE;
1321                                 goto fail_child;
1322                         }
1323
1324                 if (context->cpu_sched_set) {
1325                         struct sched_param param = {
1326                                 .sched_priority = context->cpu_sched_priority,
1327                         };
1328
1329                         r = sched_setscheduler(0,
1330                                                context->cpu_sched_policy |
1331                                                (context->cpu_sched_reset_on_fork ?
1332                                                 SCHED_RESET_ON_FORK : 0),
1333                                                &param);
1334                         if (r < 0) {
1335                                 err = -errno;
1336                                 r = EXIT_SETSCHEDULER;
1337                                 goto fail_child;
1338                         }
1339                 }
1340
1341                 if (context->cpuset)
1342                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1343                                 err = -errno;
1344                                 r = EXIT_CPUAFFINITY;
1345                                 goto fail_child;
1346                         }
1347
1348                 if (context->ioprio_set)
1349                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1350                                 err = -errno;
1351                                 r = EXIT_IOPRIO;
1352                                 goto fail_child;
1353                         }
1354
1355                 if (context->timer_slack_nsec != (nsec_t) -1)
1356                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1357                                 err = -errno;
1358                                 r = EXIT_TIMERSLACK;
1359                                 goto fail_child;
1360                         }
1361
1362                 if (context->utmp_id)
1363                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1364
1365                 if (context->user) {
1366                         username = context->user;
1367                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1368                         if (err < 0) {
1369                                 r = EXIT_USER;
1370                                 goto fail_child;
1371                         }
1372
1373                         if (is_terminal_input(context->std_input)) {
1374                                 err = chown_terminal(STDIN_FILENO, uid);
1375                                 if (err < 0) {
1376                                         r = EXIT_STDIN;
1377                                         goto fail_child;
1378                                 }
1379                         }
1380                 }
1381
1382 #ifdef HAVE_PAM
1383                 if (cgroup_path && context->user && context->pam_name) {
1384                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1385                         if (err < 0) {
1386                                 r = EXIT_CGROUP;
1387                                 goto fail_child;
1388                         }
1389
1390
1391                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1392                         if (err < 0) {
1393                                 r = EXIT_CGROUP;
1394                                 goto fail_child;
1395                         }
1396                 }
1397 #endif
1398
1399                 if (apply_permissions) {
1400                         err = enforce_groups(context, username, gid);
1401                         if (err < 0) {
1402                                 r = EXIT_GROUP;
1403                                 goto fail_child;
1404                         }
1405                 }
1406
1407                 umask(context->umask);
1408
1409 #ifdef HAVE_PAM
1410                 if (apply_permissions && context->pam_name && username) {
1411                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1412                         if (err < 0) {
1413                                 r = EXIT_PAM;
1414                                 goto fail_child;
1415                         }
1416                 }
1417 #endif
1418                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1419                         err = setup_netns(runtime->netns_storage_socket);
1420                         if (err < 0) {
1421                                 r = EXIT_NETWORK;
1422                                 goto fail_child;
1423                         }
1424                 }
1425
1426                 if (!strv_isempty(context->read_write_dirs) ||
1427                     !strv_isempty(context->read_only_dirs) ||
1428                     !strv_isempty(context->inaccessible_dirs) ||
1429                     context->mount_flags != 0 ||
1430                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))) {
1431
1432                         char *tmp = NULL, *var = NULL;
1433
1434                         /* The runtime struct only contains the parent
1435                          * of the private /tmp, which is
1436                          * non-accessible to world users. Inside of it
1437                          * there's a /tmp that is sticky, and that's
1438                          * the one we want to use here. */
1439
1440                         if (context->private_tmp && runtime) {
1441                                 if (runtime->tmp_dir)
1442                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1443                                 if (runtime->var_tmp_dir)
1444                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1445                         }
1446
1447                         err = setup_namespace(
1448                                         context->read_write_dirs,
1449                                         context->read_only_dirs,
1450                                         context->inaccessible_dirs,
1451                                         tmp,
1452                                         var,
1453                                         context->mount_flags);
1454
1455                         if (err < 0) {
1456                                 r = EXIT_NAMESPACE;
1457                                 goto fail_child;
1458                         }
1459                 }
1460
1461                 if (apply_chroot) {
1462                         if (context->root_directory)
1463                                 if (chroot(context->root_directory) < 0) {
1464                                         err = -errno;
1465                                         r = EXIT_CHROOT;
1466                                         goto fail_child;
1467                                 }
1468
1469                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1470                                 err = -errno;
1471                                 r = EXIT_CHDIR;
1472                                 goto fail_child;
1473                         }
1474                 } else {
1475                         _cleanup_free_ char *d = NULL;
1476
1477                         if (asprintf(&d, "%s/%s",
1478                                      context->root_directory ? context->root_directory : "",
1479                                      context->working_directory ? context->working_directory : "") < 0) {
1480                                 err = -ENOMEM;
1481                                 r = EXIT_MEMORY;
1482                                 goto fail_child;
1483                         }
1484
1485                         if (chdir(d) < 0) {
1486                                 err = -errno;
1487                                 r = EXIT_CHDIR;
1488                                 goto fail_child;
1489                         }
1490                 }
1491
1492                 /* We repeat the fd closing here, to make sure that
1493                  * nothing is leaked from the PAM modules */
1494                 err = close_all_fds(fds, n_fds);
1495                 if (err >= 0)
1496                         err = shift_fds(fds, n_fds);
1497                 if (err >= 0)
1498                         err = flags_fds(fds, n_fds, context->non_blocking);
1499                 if (err < 0) {
1500                         r = EXIT_FDS;
1501                         goto fail_child;
1502                 }
1503
1504                 if (apply_permissions) {
1505
1506                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1507                                 if (!context->rlimit[i])
1508                                         continue;
1509
1510                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1511                                         err = -errno;
1512                                         r = EXIT_LIMITS;
1513                                         goto fail_child;
1514                                 }
1515                         }
1516
1517                         if (context->capability_bounding_set_drop) {
1518                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1519                                 if (err < 0) {
1520                                         r = EXIT_CAPABILITIES;
1521                                         goto fail_child;
1522                                 }
1523                         }
1524
1525                         if (context->user) {
1526                                 err = enforce_user(context, uid);
1527                                 if (err < 0) {
1528                                         r = EXIT_USER;
1529                                         goto fail_child;
1530                                 }
1531                         }
1532
1533                         /* PR_GET_SECUREBITS is not privileged, while
1534                          * PR_SET_SECUREBITS is. So to suppress
1535                          * potential EPERMs we'll try not to call
1536                          * PR_SET_SECUREBITS unless necessary. */
1537                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1538                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1539                                         err = -errno;
1540                                         r = EXIT_SECUREBITS;
1541                                         goto fail_child;
1542                                 }
1543
1544                         if (context->capabilities)
1545                                 if (cap_set_proc(context->capabilities) < 0) {
1546                                         err = -errno;
1547                                         r = EXIT_CAPABILITIES;
1548                                         goto fail_child;
1549                                 }
1550
1551                         if (context->no_new_privileges)
1552                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1553                                         err = -errno;
1554                                         r = EXIT_NO_NEW_PRIVILEGES;
1555                                         goto fail_child;
1556                                 }
1557
1558                         if (context->syscall_filter) {
1559                                 err = apply_seccomp(context->syscall_filter);
1560                                 if (err < 0) {
1561                                         r = EXIT_SECCOMP;
1562                                         goto fail_child;
1563                                 }
1564                         }
1565                 }
1566
1567                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1568                 if (r < 0) {
1569                         r = EXIT_MEMORY;
1570                         goto fail_child;
1571                 }
1572
1573                 final_env = strv_env_merge(5,
1574                                            environment,
1575                                            our_env,
1576                                            context->environment,
1577                                            files_env,
1578                                            pam_env,
1579                                            NULL);
1580                 if (!final_env) {
1581                         err = -ENOMEM;
1582                         r = EXIT_MEMORY;
1583                         goto fail_child;
1584                 }
1585
1586                 final_argv = replace_env_argv(argv, final_env);
1587                 if (!final_argv) {
1588                         err = -ENOMEM;
1589                         r = EXIT_MEMORY;
1590                         goto fail_child;
1591                 }
1592
1593                 final_env = strv_env_clean(final_env);
1594
1595                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1596                         line = exec_command_line(final_argv);
1597                         if (line) {
1598                                 log_open();
1599                                 log_struct_unit(LOG_DEBUG,
1600                                                 unit_id,
1601                                                 "EXECUTABLE=%s", command->path,
1602                                                 "MESSAGE=Executing: %s", line,
1603                                                 NULL);
1604                                 log_close();
1605                                 free(line);
1606                                 line = NULL;
1607                         }
1608                 }
1609                 execve(command->path, final_argv, final_env);
1610                 err = -errno;
1611                 r = EXIT_EXEC;
1612
1613         fail_child:
1614                 if (r != 0) {
1615                         log_open();
1616                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1617                                    "EXECUTABLE=%s", command->path,
1618                                    "MESSAGE=Failed at step %s spawning %s: %s",
1619                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1620                                           command->path, strerror(-err),
1621                                    "ERRNO=%d", -err,
1622                                    NULL);
1623                         log_close();
1624                 }
1625
1626                 _exit(r);
1627         }
1628
1629         log_struct_unit(LOG_DEBUG,
1630                         unit_id,
1631                         "MESSAGE=Forked %s as "PID_FMT,
1632                         command->path, pid,
1633                         NULL);
1634
1635         /* We add the new process to the cgroup both in the child (so
1636          * that we can be sure that no user code is ever executed
1637          * outside of the cgroup) and in the parent (so that we can be
1638          * sure that when we kill the cgroup the process will be
1639          * killed too). */
1640         if (cgroup_path)
1641                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1642
1643         exec_status_start(&command->exec_status, pid);
1644
1645         *ret = pid;
1646         return 0;
1647 }
1648
1649 void exec_context_init(ExecContext *c) {
1650         assert(c);
1651
1652         c->umask = 0022;
1653         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1654         c->cpu_sched_policy = SCHED_OTHER;
1655         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1656         c->syslog_level_prefix = true;
1657         c->ignore_sigpipe = true;
1658         c->timer_slack_nsec = (nsec_t) -1;
1659 }
1660
1661 void exec_context_done(ExecContext *c) {
1662         unsigned l;
1663
1664         assert(c);
1665
1666         strv_free(c->environment);
1667         c->environment = NULL;
1668
1669         strv_free(c->environment_files);
1670         c->environment_files = NULL;
1671
1672         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1673                 free(c->rlimit[l]);
1674                 c->rlimit[l] = NULL;
1675         }
1676
1677         free(c->working_directory);
1678         c->working_directory = NULL;
1679         free(c->root_directory);
1680         c->root_directory = NULL;
1681
1682         free(c->tty_path);
1683         c->tty_path = NULL;
1684
1685         free(c->tcpwrap_name);
1686         c->tcpwrap_name = NULL;
1687
1688         free(c->syslog_identifier);
1689         c->syslog_identifier = NULL;
1690
1691         free(c->user);
1692         c->user = NULL;
1693
1694         free(c->group);
1695         c->group = NULL;
1696
1697         strv_free(c->supplementary_groups);
1698         c->supplementary_groups = NULL;
1699
1700         free(c->pam_name);
1701         c->pam_name = NULL;
1702
1703         if (c->capabilities) {
1704                 cap_free(c->capabilities);
1705                 c->capabilities = NULL;
1706         }
1707
1708         strv_free(c->read_only_dirs);
1709         c->read_only_dirs = NULL;
1710
1711         strv_free(c->read_write_dirs);
1712         c->read_write_dirs = NULL;
1713
1714         strv_free(c->inaccessible_dirs);
1715         c->inaccessible_dirs = NULL;
1716
1717         if (c->cpuset)
1718                 CPU_FREE(c->cpuset);
1719
1720         free(c->utmp_id);
1721         c->utmp_id = NULL;
1722
1723         free(c->syscall_filter);
1724         c->syscall_filter = NULL;
1725 }
1726
1727 void exec_command_done(ExecCommand *c) {
1728         assert(c);
1729
1730         free(c->path);
1731         c->path = NULL;
1732
1733         strv_free(c->argv);
1734         c->argv = NULL;
1735 }
1736
1737 void exec_command_done_array(ExecCommand *c, unsigned n) {
1738         unsigned i;
1739
1740         for (i = 0; i < n; i++)
1741                 exec_command_done(c+i);
1742 }
1743
1744 void exec_command_free_list(ExecCommand *c) {
1745         ExecCommand *i;
1746
1747         while ((i = c)) {
1748                 LIST_REMOVE(command, c, i);
1749                 exec_command_done(i);
1750                 free(i);
1751         }
1752 }
1753
1754 void exec_command_free_array(ExecCommand **c, unsigned n) {
1755         unsigned i;
1756
1757         for (i = 0; i < n; i++) {
1758                 exec_command_free_list(c[i]);
1759                 c[i] = NULL;
1760         }
1761 }
1762
1763 int exec_context_load_environment(const ExecContext *c, char ***l) {
1764         char **i, **r = NULL;
1765
1766         assert(c);
1767         assert(l);
1768
1769         STRV_FOREACH(i, c->environment_files) {
1770                 char *fn;
1771                 int k;
1772                 bool ignore = false;
1773                 char **p;
1774                 _cleanup_globfree_ glob_t pglob = {};
1775                 int count, n;
1776
1777                 fn = *i;
1778
1779                 if (fn[0] == '-') {
1780                         ignore = true;
1781                         fn ++;
1782                 }
1783
1784                 if (!path_is_absolute(fn)) {
1785                         if (ignore)
1786                                 continue;
1787
1788                         strv_free(r);
1789                         return -EINVAL;
1790                 }
1791
1792                 /* Filename supports globbing, take all matching files */
1793                 errno = 0;
1794                 if (glob(fn, 0, NULL, &pglob) != 0) {
1795                         if (ignore)
1796                                 continue;
1797
1798                         strv_free(r);
1799                         return errno ? -errno : -EINVAL;
1800                 }
1801                 count = pglob.gl_pathc;
1802                 if (count == 0) {
1803                         if (ignore)
1804                                 continue;
1805
1806                         strv_free(r);
1807                         return -EINVAL;
1808                 }
1809                 for (n = 0; n < count; n++) {
1810                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1811                         if (k < 0) {
1812                                 if (ignore)
1813                                         continue;
1814
1815                                 strv_free(r);
1816                                 return k;
1817                         }
1818                         /* Log invalid environment variables with filename */
1819                         if (p)
1820                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1821
1822                         if (r == NULL)
1823                                 r = p;
1824                         else {
1825                                 char **m;
1826
1827                                 m = strv_env_merge(2, r, p);
1828                                 strv_free(r);
1829                                 strv_free(p);
1830                                 if (!m)
1831                                         return -ENOMEM;
1832
1833                                 r = m;
1834                         }
1835                 }
1836         }
1837
1838         *l = r;
1839
1840         return 0;
1841 }
1842
1843 static bool tty_may_match_dev_console(const char *tty) {
1844         char *active = NULL, *console;
1845         bool b;
1846
1847         if (startswith(tty, "/dev/"))
1848                 tty += 5;
1849
1850         /* trivial identity? */
1851         if (streq(tty, "console"))
1852                 return true;
1853
1854         console = resolve_dev_console(&active);
1855         /* if we could not resolve, assume it may */
1856         if (!console)
1857                 return true;
1858
1859         /* "tty0" means the active VC, so it may be the same sometimes */
1860         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1861         free(active);
1862
1863         return b;
1864 }
1865
1866 bool exec_context_may_touch_console(ExecContext *ec) {
1867         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1868                 is_terminal_input(ec->std_input) ||
1869                 is_terminal_output(ec->std_output) ||
1870                 is_terminal_output(ec->std_error)) &&
1871                tty_may_match_dev_console(tty_path(ec));
1872 }
1873
1874 static void strv_fprintf(FILE *f, char **l) {
1875         char **g;
1876
1877         assert(f);
1878
1879         STRV_FOREACH(g, l)
1880                 fprintf(f, " %s", *g);
1881 }
1882
1883 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1884         char **e;
1885         unsigned i;
1886
1887         assert(c);
1888         assert(f);
1889
1890         prefix = strempty(prefix);
1891
1892         fprintf(f,
1893                 "%sUMask: %04o\n"
1894                 "%sWorkingDirectory: %s\n"
1895                 "%sRootDirectory: %s\n"
1896                 "%sNonBlocking: %s\n"
1897                 "%sPrivateTmp: %s\n"
1898                 "%sPrivateNetwork: %s\n"
1899                 "%sIgnoreSIGPIPE: %s\n",
1900                 prefix, c->umask,
1901                 prefix, c->working_directory ? c->working_directory : "/",
1902                 prefix, c->root_directory ? c->root_directory : "/",
1903                 prefix, yes_no(c->non_blocking),
1904                 prefix, yes_no(c->private_tmp),
1905                 prefix, yes_no(c->private_network),
1906                 prefix, yes_no(c->ignore_sigpipe));
1907
1908         STRV_FOREACH(e, c->environment)
1909                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1910
1911         STRV_FOREACH(e, c->environment_files)
1912                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1913
1914         if (c->tcpwrap_name)
1915                 fprintf(f,
1916                         "%sTCPWrapName: %s\n",
1917                         prefix, c->tcpwrap_name);
1918
1919         if (c->nice_set)
1920                 fprintf(f,
1921                         "%sNice: %i\n",
1922                         prefix, c->nice);
1923
1924         if (c->oom_score_adjust_set)
1925                 fprintf(f,
1926                         "%sOOMScoreAdjust: %i\n",
1927                         prefix, c->oom_score_adjust);
1928
1929         for (i = 0; i < RLIM_NLIMITS; i++)
1930                 if (c->rlimit[i])
1931                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1932
1933         if (c->ioprio_set) {
1934                 char *class_str;
1935                 int r;
1936
1937                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1938                 if (r < 0)
1939                         class_str = NULL;
1940                 fprintf(f,
1941                         "%sIOSchedulingClass: %s\n"
1942                         "%sIOPriority: %i\n",
1943                         prefix, strna(class_str),
1944                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1945                 free(class_str);
1946         }
1947
1948         if (c->cpu_sched_set) {
1949                 char *policy_str;
1950                 int r;
1951
1952                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1953                 if (r < 0)
1954                         policy_str = NULL;
1955                 fprintf(f,
1956                         "%sCPUSchedulingPolicy: %s\n"
1957                         "%sCPUSchedulingPriority: %i\n"
1958                         "%sCPUSchedulingResetOnFork: %s\n",
1959                         prefix, strna(policy_str),
1960                         prefix, c->cpu_sched_priority,
1961                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1962                 free(policy_str);
1963         }
1964
1965         if (c->cpuset) {
1966                 fprintf(f, "%sCPUAffinity:", prefix);
1967                 for (i = 0; i < c->cpuset_ncpus; i++)
1968                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1969                                 fprintf(f, " %u", i);
1970                 fputs("\n", f);
1971         }
1972
1973         if (c->timer_slack_nsec != (nsec_t) -1)
1974                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
1975
1976         fprintf(f,
1977                 "%sStandardInput: %s\n"
1978                 "%sStandardOutput: %s\n"
1979                 "%sStandardError: %s\n",
1980                 prefix, exec_input_to_string(c->std_input),
1981                 prefix, exec_output_to_string(c->std_output),
1982                 prefix, exec_output_to_string(c->std_error));
1983
1984         if (c->tty_path)
1985                 fprintf(f,
1986                         "%sTTYPath: %s\n"
1987                         "%sTTYReset: %s\n"
1988                         "%sTTYVHangup: %s\n"
1989                         "%sTTYVTDisallocate: %s\n",
1990                         prefix, c->tty_path,
1991                         prefix, yes_no(c->tty_reset),
1992                         prefix, yes_no(c->tty_vhangup),
1993                         prefix, yes_no(c->tty_vt_disallocate));
1994
1995         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
1996             c->std_output == EXEC_OUTPUT_KMSG ||
1997             c->std_output == EXEC_OUTPUT_JOURNAL ||
1998             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
1999             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2000             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2001             c->std_error == EXEC_OUTPUT_SYSLOG ||
2002             c->std_error == EXEC_OUTPUT_KMSG ||
2003             c->std_error == EXEC_OUTPUT_JOURNAL ||
2004             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2005             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2006             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2007
2008                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2009
2010                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2011                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2012
2013                 fprintf(f,
2014                         "%sSyslogFacility: %s\n"
2015                         "%sSyslogLevel: %s\n",
2016                         prefix, strna(fac_str),
2017                         prefix, strna(lvl_str));
2018         }
2019
2020         if (c->capabilities) {
2021                 _cleanup_cap_free_charp_ char *t;
2022
2023                 t = cap_to_text(c->capabilities, NULL);
2024                 if (t)
2025                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2026         }
2027
2028         if (c->secure_bits)
2029                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2030                         prefix,
2031                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2032                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2033                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2034                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2035                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2036                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2037
2038         if (c->capability_bounding_set_drop) {
2039                 unsigned long l;
2040                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2041
2042                 for (l = 0; l <= cap_last_cap(); l++)
2043                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2044                                 _cleanup_cap_free_charp_ char *t;
2045
2046                                 t = cap_to_name(l);
2047                                 if (t)
2048                                         fprintf(f, " %s", t);
2049                         }
2050
2051                 fputs("\n", f);
2052         }
2053
2054         if (c->user)
2055                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2056         if (c->group)
2057                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2058
2059         if (strv_length(c->supplementary_groups) > 0) {
2060                 fprintf(f, "%sSupplementaryGroups:", prefix);
2061                 strv_fprintf(f, c->supplementary_groups);
2062                 fputs("\n", f);
2063         }
2064
2065         if (c->pam_name)
2066                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2067
2068         if (strv_length(c->read_write_dirs) > 0) {
2069                 fprintf(f, "%sReadWriteDirs:", prefix);
2070                 strv_fprintf(f, c->read_write_dirs);
2071                 fputs("\n", f);
2072         }
2073
2074         if (strv_length(c->read_only_dirs) > 0) {
2075                 fprintf(f, "%sReadOnlyDirs:", prefix);
2076                 strv_fprintf(f, c->read_only_dirs);
2077                 fputs("\n", f);
2078         }
2079
2080         if (strv_length(c->inaccessible_dirs) > 0) {
2081                 fprintf(f, "%sInaccessibleDirs:", prefix);
2082                 strv_fprintf(f, c->inaccessible_dirs);
2083                 fputs("\n", f);
2084         }
2085
2086         if (c->utmp_id)
2087                 fprintf(f,
2088                         "%sUtmpIdentifier: %s\n",
2089                         prefix, c->utmp_id);
2090 }
2091
2092 void exec_status_start(ExecStatus *s, pid_t pid) {
2093         assert(s);
2094
2095         zero(*s);
2096         s->pid = pid;
2097         dual_timestamp_get(&s->start_timestamp);
2098 }
2099
2100 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2101         assert(s);
2102
2103         if (s->pid && s->pid != pid)
2104                 zero(*s);
2105
2106         s->pid = pid;
2107         dual_timestamp_get(&s->exit_timestamp);
2108
2109         s->code = code;
2110         s->status = status;
2111
2112         if (context) {
2113                 if (context->utmp_id)
2114                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2115
2116                 exec_context_tty_reset(context);
2117         }
2118 }
2119
2120 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2121         char buf[FORMAT_TIMESTAMP_MAX];
2122
2123         assert(s);
2124         assert(f);
2125
2126         if (!prefix)
2127                 prefix = "";
2128
2129         if (s->pid <= 0)
2130                 return;
2131
2132         fprintf(f,
2133                 "%sPID: "PID_FMT"\n",
2134                 prefix, s->pid);
2135
2136         if (s->start_timestamp.realtime > 0)
2137                 fprintf(f,
2138                         "%sStart Timestamp: %s\n",
2139                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2140
2141         if (s->exit_timestamp.realtime > 0)
2142                 fprintf(f,
2143                         "%sExit Timestamp: %s\n"
2144                         "%sExit Code: %s\n"
2145                         "%sExit Status: %i\n",
2146                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2147                         prefix, sigchld_code_to_string(s->code),
2148                         prefix, s->status);
2149 }
2150
2151 char *exec_command_line(char **argv) {
2152         size_t k;
2153         char *n, *p, **a;
2154         bool first = true;
2155
2156         assert(argv);
2157
2158         k = 1;
2159         STRV_FOREACH(a, argv)
2160                 k += strlen(*a)+3;
2161
2162         if (!(n = new(char, k)))
2163                 return NULL;
2164
2165         p = n;
2166         STRV_FOREACH(a, argv) {
2167
2168                 if (!first)
2169                         *(p++) = ' ';
2170                 else
2171                         first = false;
2172
2173                 if (strpbrk(*a, WHITESPACE)) {
2174                         *(p++) = '\'';
2175                         p = stpcpy(p, *a);
2176                         *(p++) = '\'';
2177                 } else
2178                         p = stpcpy(p, *a);
2179
2180         }
2181
2182         *p = 0;
2183
2184         /* FIXME: this doesn't really handle arguments that have
2185          * spaces and ticks in them */
2186
2187         return n;
2188 }
2189
2190 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2191         char *p2;
2192         const char *prefix2;
2193
2194         char *cmd;
2195
2196         assert(c);
2197         assert(f);
2198
2199         if (!prefix)
2200                 prefix = "";
2201         p2 = strappend(prefix, "\t");
2202         prefix2 = p2 ? p2 : prefix;
2203
2204         cmd = exec_command_line(c->argv);
2205
2206         fprintf(f,
2207                 "%sCommand Line: %s\n",
2208                 prefix, cmd ? cmd : strerror(ENOMEM));
2209
2210         free(cmd);
2211
2212         exec_status_dump(&c->exec_status, f, prefix2);
2213
2214         free(p2);
2215 }
2216
2217 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2218         assert(f);
2219
2220         if (!prefix)
2221                 prefix = "";
2222
2223         LIST_FOREACH(command, c, c)
2224                 exec_command_dump(c, f, prefix);
2225 }
2226
2227 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2228         ExecCommand *end;
2229
2230         assert(l);
2231         assert(e);
2232
2233         if (*l) {
2234                 /* It's kind of important, that we keep the order here */
2235                 LIST_FIND_TAIL(command, *l, end);
2236                 LIST_INSERT_AFTER(command, *l, end, e);
2237         } else
2238               *l = e;
2239 }
2240
2241 int exec_command_set(ExecCommand *c, const char *path, ...) {
2242         va_list ap;
2243         char **l, *p;
2244
2245         assert(c);
2246         assert(path);
2247
2248         va_start(ap, path);
2249         l = strv_new_ap(path, ap);
2250         va_end(ap);
2251
2252         if (!l)
2253                 return -ENOMEM;
2254
2255         p = strdup(path);
2256         if (!p) {
2257                 strv_free(l);
2258                 return -ENOMEM;
2259         }
2260
2261         free(c->path);
2262         c->path = p;
2263
2264         strv_free(c->argv);
2265         c->argv = l;
2266
2267         return 0;
2268 }
2269
2270 static int exec_runtime_allocate(ExecRuntime **rt) {
2271
2272         if (*rt)
2273                 return 0;
2274
2275         *rt = new0(ExecRuntime, 1);
2276         if (!*rt)
2277                 return -ENOMEM;
2278
2279         (*rt)->n_ref = 1;
2280         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2281
2282         return 0;
2283 }
2284
2285 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2286         int r;
2287
2288         assert(rt);
2289         assert(c);
2290         assert(id);
2291
2292         if (*rt)
2293                 return 1;
2294
2295         if (!c->private_network && !c->private_tmp)
2296                 return 0;
2297
2298         r = exec_runtime_allocate(rt);
2299         if (r < 0)
2300                 return r;
2301
2302         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2303                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2304                         return -errno;
2305         }
2306
2307         if (c->private_tmp && !(*rt)->tmp_dir) {
2308                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2309                 if (r < 0)
2310                         return r;
2311         }
2312
2313         return 1;
2314 }
2315
2316 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2317         assert(r);
2318         assert(r->n_ref > 0);
2319
2320         r->n_ref++;
2321         return r;
2322 }
2323
2324 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2325
2326         if (!r)
2327                 return NULL;
2328
2329         assert(r->n_ref > 0);
2330
2331         r->n_ref--;
2332         if (r->n_ref <= 0) {
2333                 free(r->tmp_dir);
2334                 free(r->var_tmp_dir);
2335                 close_pipe(r->netns_storage_socket);
2336                 free(r);
2337         }
2338
2339         return NULL;
2340 }
2341
2342 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2343         assert(u);
2344         assert(f);
2345         assert(fds);
2346
2347         if (!rt)
2348                 return 0;
2349
2350         if (rt->tmp_dir)
2351                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2352
2353         if (rt->var_tmp_dir)
2354                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2355
2356         if (rt->netns_storage_socket[0] >= 0) {
2357                 int copy;
2358
2359                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2360                 if (copy < 0)
2361                         return copy;
2362
2363                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2364         }
2365
2366         if (rt->netns_storage_socket[1] >= 0) {
2367                 int copy;
2368
2369                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2370                 if (copy < 0)
2371                         return copy;
2372
2373                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2374         }
2375
2376         return 0;
2377 }
2378
2379 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2380         int r;
2381
2382         assert(rt);
2383         assert(key);
2384         assert(value);
2385
2386         if (streq(key, "tmp-dir")) {
2387                 char *copy;
2388
2389                 r = exec_runtime_allocate(rt);
2390                 if (r < 0)
2391                         return r;
2392
2393                 copy = strdup(value);
2394                 if (!copy)
2395                         return log_oom();
2396
2397                 free((*rt)->tmp_dir);
2398                 (*rt)->tmp_dir = copy;
2399
2400         } else if (streq(key, "var-tmp-dir")) {
2401                 char *copy;
2402
2403                 r = exec_runtime_allocate(rt);
2404                 if (r < 0)
2405                         return r;
2406
2407                 copy = strdup(value);
2408                 if (!copy)
2409                         return log_oom();
2410
2411                 free((*rt)->var_tmp_dir);
2412                 (*rt)->var_tmp_dir = copy;
2413
2414         } else if (streq(key, "netns-socket-0")) {
2415                 int fd;
2416
2417                 r = exec_runtime_allocate(rt);
2418                 if (r < 0)
2419                         return r;
2420
2421                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2422                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2423                 else {
2424                         if ((*rt)->netns_storage_socket[0] >= 0)
2425                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2426
2427                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2428                 }
2429         } else if (streq(key, "netns-socket-1")) {
2430                 int fd;
2431
2432                 r = exec_runtime_allocate(rt);
2433                 if (r < 0)
2434                         return r;
2435
2436                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2437                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2438                 else {
2439                         if ((*rt)->netns_storage_socket[1] >= 0)
2440                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2441
2442                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2443                 }
2444         } else
2445                 return 0;
2446
2447         return 1;
2448 }
2449
2450 static void *remove_tmpdir_thread(void *p) {
2451         _cleanup_free_ char *path = p;
2452
2453         rm_rf_dangerous(path, false, true, false);
2454         return NULL;
2455 }
2456
2457 void exec_runtime_destroy(ExecRuntime *rt) {
2458         if (!rt)
2459                 return;
2460
2461         /* If there are multiple users of this, let's leave the stuff around */
2462         if (rt->n_ref > 1)
2463                 return;
2464
2465         if (rt->tmp_dir) {
2466                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2467                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2468                 rt->tmp_dir = NULL;
2469         }
2470
2471         if (rt->var_tmp_dir) {
2472                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2473                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2474                 rt->var_tmp_dir = NULL;
2475         }
2476
2477         close_pipe(rt->netns_storage_socket);
2478 }
2479
2480 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2481         [EXEC_INPUT_NULL] = "null",
2482         [EXEC_INPUT_TTY] = "tty",
2483         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2484         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2485         [EXEC_INPUT_SOCKET] = "socket"
2486 };
2487
2488 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2489
2490 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2491         [EXEC_OUTPUT_INHERIT] = "inherit",
2492         [EXEC_OUTPUT_NULL] = "null",
2493         [EXEC_OUTPUT_TTY] = "tty",
2494         [EXEC_OUTPUT_SYSLOG] = "syslog",
2495         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2496         [EXEC_OUTPUT_KMSG] = "kmsg",
2497         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2498         [EXEC_OUTPUT_JOURNAL] = "journal",
2499         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2500         [EXEC_OUTPUT_SOCKET] = "socket"
2501 };
2502
2503 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);