chiark / gitweb /
smack: introduce new SmackProcessLabel option
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
88 #include "label.h"
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103         int start, restart_from;
104
105         if (n_fds <= 0)
106                 return 0;
107
108         /* Modifies the fds array! (sorts it) */
109
110         assert(fds);
111
112         start = 0;
113         for (;;) {
114                 int i;
115
116                 restart_from = -1;
117
118                 for (i = start; i < (int) n_fds; i++) {
119                         int nfd;
120
121                         /* Already at right index? */
122                         if (fds[i] == i+3)
123                                 continue;
124
125                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
126                                 return -errno;
127
128                         safe_close(fds[i]);
129                         fds[i] = nfd;
130
131                         /* Hmm, the fd we wanted isn't free? Then
132                          * let's remember that and try again from here*/
133                         if (nfd != i+3 && restart_from < 0)
134                                 restart_from = i;
135                 }
136
137                 if (restart_from < 0)
138                         break;
139
140                 start = restart_from;
141         }
142
143         return 0;
144 }
145
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
147         unsigned i;
148         int r;
149
150         if (n_fds <= 0)
151                 return 0;
152
153         assert(fds);
154
155         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156
157         for (i = 0; i < n_fds; i++) {
158
159                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
160                         return r;
161
162                 /* We unconditionally drop FD_CLOEXEC from the fds,
163                  * since after all we want to pass these fds to our
164                  * children */
165
166                 if ((r = fd_cloexec(fds[i], false)) < 0)
167                         return r;
168         }
169
170         return 0;
171 }
172
173 _pure_ static const char *tty_path(const ExecContext *context) {
174         assert(context);
175
176         if (context->tty_path)
177                 return context->tty_path;
178
179         return "/dev/console";
180 }
181
182 static void exec_context_tty_reset(const ExecContext *context) {
183         assert(context);
184
185         if (context->tty_vhangup)
186                 terminal_vhangup(tty_path(context));
187
188         if (context->tty_reset)
189                 reset_terminal(tty_path(context));
190
191         if (context->tty_vt_disallocate && context->tty_path)
192                 vt_disallocate(context->tty_path);
193 }
194
195 static bool is_terminal_output(ExecOutput o) {
196         return
197                 o == EXEC_OUTPUT_TTY ||
198                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
201 }
202
203 static int open_null_as(int flags, int nfd) {
204         int fd, r;
205
206         assert(nfd >= 0);
207
208         fd = open("/dev/null", flags|O_NOCTTY);
209         if (fd < 0)
210                 return -errno;
211
212         if (fd != nfd) {
213                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
214                 safe_close(fd);
215         } else
216                 r = nfd;
217
218         return r;
219 }
220
221 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
222         int fd, r;
223         union sockaddr_union sa = {
224                 .un.sun_family = AF_UNIX,
225                 .un.sun_path = "/run/systemd/journal/stdout",
226         };
227
228         assert(context);
229         assert(output < _EXEC_OUTPUT_MAX);
230         assert(ident);
231         assert(nfd >= 0);
232
233         fd = socket(AF_UNIX, SOCK_STREAM, 0);
234         if (fd < 0)
235                 return -errno;
236
237         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
238         if (r < 0) {
239                 safe_close(fd);
240                 return -errno;
241         }
242
243         if (shutdown(fd, SHUT_RD) < 0) {
244                 safe_close(fd);
245                 return -errno;
246         }
247
248         fd_inc_sndbuf(fd, SNDBUF_SIZE);
249
250         dprintf(fd,
251                 "%s\n"
252                 "%s\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n"
256                 "%i\n"
257                 "%i\n",
258                 context->syslog_identifier ? context->syslog_identifier : ident,
259                 unit_id,
260                 context->syslog_priority,
261                 !!context->syslog_level_prefix,
262                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
263                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
264                 is_terminal_output(output));
265
266         if (fd != nfd) {
267                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268                 safe_close(fd);
269         } else
270                 r = nfd;
271
272         return r;
273 }
274 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
275         int fd, r;
276
277         assert(path);
278         assert(nfd >= 0);
279
280         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281                 return fd;
282
283         if (fd != nfd) {
284                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
285                 safe_close(fd);
286         } else
287                 r = nfd;
288
289         return r;
290 }
291
292 static bool is_terminal_input(ExecInput i) {
293         return
294                 i == EXEC_INPUT_TTY ||
295                 i == EXEC_INPUT_TTY_FORCE ||
296                 i == EXEC_INPUT_TTY_FAIL;
297 }
298
299 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
300
301         if (is_terminal_input(std_input) && !apply_tty_stdin)
302                 return EXEC_INPUT_NULL;
303
304         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
305                 return EXEC_INPUT_NULL;
306
307         return std_input;
308 }
309
310 static int fixup_output(ExecOutput std_output, int socket_fd) {
311
312         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
313                 return EXEC_OUTPUT_INHERIT;
314
315         return std_output;
316 }
317
318 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
319         ExecInput i;
320
321         assert(context);
322
323         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324
325         switch (i) {
326
327         case EXEC_INPUT_NULL:
328                 return open_null_as(O_RDONLY, STDIN_FILENO);
329
330         case EXEC_INPUT_TTY:
331         case EXEC_INPUT_TTY_FORCE:
332         case EXEC_INPUT_TTY_FAIL: {
333                 int fd, r;
334
335                 fd = acquire_terminal(tty_path(context),
336                                       i == EXEC_INPUT_TTY_FAIL,
337                                       i == EXEC_INPUT_TTY_FORCE,
338                                       false,
339                                       USEC_INFINITY);
340                 if (fd < 0)
341                         return fd;
342
343                 if (fd != STDIN_FILENO) {
344                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
345                         safe_close(fd);
346                 } else
347                         r = STDIN_FILENO;
348
349                 return r;
350         }
351
352         case EXEC_INPUT_SOCKET:
353                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
354
355         default:
356                 assert_not_reached("Unknown input type");
357         }
358 }
359
360 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
361         ExecOutput o;
362         ExecInput i;
363         int r;
364
365         assert(context);
366         assert(ident);
367
368         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
369         o = fixup_output(context->std_output, socket_fd);
370
371         if (fileno == STDERR_FILENO) {
372                 ExecOutput e;
373                 e = fixup_output(context->std_error, socket_fd);
374
375                 /* This expects the input and output are already set up */
376
377                 /* Don't change the stderr file descriptor if we inherit all
378                  * the way and are not on a tty */
379                 if (e == EXEC_OUTPUT_INHERIT &&
380                     o == EXEC_OUTPUT_INHERIT &&
381                     i == EXEC_INPUT_NULL &&
382                     !is_terminal_input(context->std_input) &&
383                     getppid () != 1)
384                         return fileno;
385
386                 /* Duplicate from stdout if possible */
387                 if (e == o || e == EXEC_OUTPUT_INHERIT)
388                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389
390                 o = e;
391
392         } else if (o == EXEC_OUTPUT_INHERIT) {
393                 /* If input got downgraded, inherit the original value */
394                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
395                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
396
397                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
398                 if (i != EXEC_INPUT_NULL)
399                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
400
401                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402                 if (getppid() != 1)
403                         return fileno;
404
405                 /* We need to open /dev/null here anew, to get the right access mode. */
406                 return open_null_as(O_WRONLY, fileno);
407         }
408
409         switch (o) {
410
411         case EXEC_OUTPUT_NULL:
412                 return open_null_as(O_WRONLY, fileno);
413
414         case EXEC_OUTPUT_TTY:
415                 if (is_terminal_input(i))
416                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
417
418                 /* We don't reset the terminal if this is just about output */
419                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
420
421         case EXEC_OUTPUT_SYSLOG:
422         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
423         case EXEC_OUTPUT_KMSG:
424         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
425         case EXEC_OUTPUT_JOURNAL:
426         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
427                 r = connect_logger_as(context, o, ident, unit_id, fileno);
428                 if (r < 0) {
429                         log_struct_unit(LOG_CRIT, unit_id,
430                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
431                                 fileno == STDOUT_FILENO ? "out" : "err",
432                                 unit_id, strerror(-r),
433                                 "ERRNO=%d", -r,
434                                 NULL);
435                         r = open_null_as(O_WRONLY, fileno);
436                 }
437                 return r;
438
439         case EXEC_OUTPUT_SOCKET:
440                 assert(socket_fd >= 0);
441                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
442
443         default:
444                 assert_not_reached("Unknown error type");
445         }
446 }
447
448 static int chown_terminal(int fd, uid_t uid) {
449         struct stat st;
450
451         assert(fd >= 0);
452
453         /* This might fail. What matters are the results. */
454         (void) fchown(fd, uid, -1);
455         (void) fchmod(fd, TTY_MODE);
456
457         if (fstat(fd, &st) < 0)
458                 return -errno;
459
460         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
461                 return -EPERM;
462
463         return 0;
464 }
465
466 static int setup_confirm_stdio(int *_saved_stdin,
467                                int *_saved_stdout) {
468         int fd = -1, saved_stdin, saved_stdout = -1, r;
469
470         assert(_saved_stdin);
471         assert(_saved_stdout);
472
473         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
474         if (saved_stdin < 0)
475                 return -errno;
476
477         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
478         if (saved_stdout < 0) {
479                 r = errno;
480                 goto fail;
481         }
482
483         fd = acquire_terminal(
484                         "/dev/console",
485                         false,
486                         false,
487                         false,
488                         DEFAULT_CONFIRM_USEC);
489         if (fd < 0) {
490                 r = fd;
491                 goto fail;
492         }
493
494         r = chown_terminal(fd, getuid());
495         if (r < 0)
496                 goto fail;
497
498         if (dup2(fd, STDIN_FILENO) < 0) {
499                 r = -errno;
500                 goto fail;
501         }
502
503         if (dup2(fd, STDOUT_FILENO) < 0) {
504                 r = -errno;
505                 goto fail;
506         }
507
508         if (fd >= 2)
509                 safe_close(fd);
510
511         *_saved_stdin = saved_stdin;
512         *_saved_stdout = saved_stdout;
513
514         return 0;
515
516 fail:
517         safe_close(saved_stdout);
518         safe_close(saved_stdin);
519         safe_close(fd);
520
521         return r;
522 }
523
524 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
525         _cleanup_close_ int fd = -1;
526         va_list ap;
527
528         assert(format);
529
530         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
531         if (fd < 0)
532                 return fd;
533
534         va_start(ap, format);
535         vdprintf(fd, format, ap);
536         va_end(ap);
537
538         return 0;
539 }
540
541 static int restore_confirm_stdio(int *saved_stdin,
542                                  int *saved_stdout) {
543
544         int r = 0;
545
546         assert(saved_stdin);
547         assert(saved_stdout);
548
549         release_terminal();
550
551         if (*saved_stdin >= 0)
552                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
553                         r = -errno;
554
555         if (*saved_stdout >= 0)
556                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
557                         r = -errno;
558
559         safe_close(*saved_stdin);
560         safe_close(*saved_stdout);
561
562         return r;
563 }
564
565 static int ask_for_confirmation(char *response, char **argv) {
566         int saved_stdout = -1, saved_stdin = -1, r;
567         _cleanup_free_ char *line = NULL;
568
569         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570         if (r < 0)
571                 return r;
572
573         line = exec_command_line(argv);
574         if (!line)
575                 return -ENOMEM;
576
577         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
578
579         restore_confirm_stdio(&saved_stdin, &saved_stdout);
580
581         return r;
582 }
583
584 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
585         bool keep_groups = false;
586         int r;
587
588         assert(context);
589
590         /* Lookup and set GID and supplementary group list. Here too
591          * we avoid NSS lookups for gid=0. */
592
593         if (context->group || username) {
594
595                 if (context->group) {
596                         const char *g = context->group;
597
598                         if ((r = get_group_creds(&g, &gid)) < 0)
599                                 return r;
600                 }
601
602                 /* First step, initialize groups from /etc/groups */
603                 if (username && gid != 0) {
604                         if (initgroups(username, gid) < 0)
605                                 return -errno;
606
607                         keep_groups = true;
608                 }
609
610                 /* Second step, set our gids */
611                 if (setresgid(gid, gid, gid) < 0)
612                         return -errno;
613         }
614
615         if (context->supplementary_groups) {
616                 int ngroups_max, k;
617                 gid_t *gids;
618                 char **i;
619
620                 /* Final step, initialize any manually set supplementary groups */
621                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
622
623                 if (!(gids = new(gid_t, ngroups_max)))
624                         return -ENOMEM;
625
626                 if (keep_groups) {
627                         if ((k = getgroups(ngroups_max, gids)) < 0) {
628                                 free(gids);
629                                 return -errno;
630                         }
631                 } else
632                         k = 0;
633
634                 STRV_FOREACH(i, context->supplementary_groups) {
635                         const char *g;
636
637                         if (k >= ngroups_max) {
638                                 free(gids);
639                                 return -E2BIG;
640                         }
641
642                         g = *i;
643                         r = get_group_creds(&g, gids+k);
644                         if (r < 0) {
645                                 free(gids);
646                                 return r;
647                         }
648
649                         k++;
650                 }
651
652                 if (setgroups(k, gids) < 0) {
653                         free(gids);
654                         return -errno;
655                 }
656
657                 free(gids);
658         }
659
660         return 0;
661 }
662
663 static int enforce_user(const ExecContext *context, uid_t uid) {
664         assert(context);
665
666         /* Sets (but doesn't lookup) the uid and make sure we keep the
667          * capabilities while doing so. */
668
669         if (context->capabilities) {
670                 _cleanup_cap_free_ cap_t d = NULL;
671                 static const cap_value_t bits[] = {
672                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
673                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
674                 };
675
676                 /* First step: If we need to keep capabilities but
677                  * drop privileges we need to make sure we keep our
678                  * caps, while we drop privileges. */
679                 if (uid != 0) {
680                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
681
682                         if (prctl(PR_GET_SECUREBITS) != sb)
683                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
684                                         return -errno;
685                 }
686
687                 /* Second step: set the capabilities. This will reduce
688                  * the capabilities to the minimum we need. */
689
690                 d = cap_dup(context->capabilities);
691                 if (!d)
692                         return -errno;
693
694                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
695                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
696                         return -errno;
697
698                 if (cap_set_proc(d) < 0)
699                         return -errno;
700         }
701
702         /* Third step: actually set the uids */
703         if (setresuid(uid, uid, uid) < 0)
704                 return -errno;
705
706         /* At this point we should have all necessary capabilities but
707            are otherwise a normal user. However, the caps might got
708            corrupted due to the setresuid() so we need clean them up
709            later. This is done outside of this call. */
710
711         return 0;
712 }
713
714 #ifdef HAVE_PAM
715
716 static int null_conv(
717                 int num_msg,
718                 const struct pam_message **msg,
719                 struct pam_response **resp,
720                 void *appdata_ptr) {
721
722         /* We don't support conversations */
723
724         return PAM_CONV_ERR;
725 }
726
727 static int setup_pam(
728                 const char *name,
729                 const char *user,
730                 uid_t uid,
731                 const char *tty,
732                 char ***pam_env,
733                 int fds[], unsigned n_fds) {
734
735         static const struct pam_conv conv = {
736                 .conv = null_conv,
737                 .appdata_ptr = NULL
738         };
739
740         pam_handle_t *handle = NULL;
741         sigset_t ss, old_ss;
742         int pam_code = PAM_SUCCESS;
743         int err;
744         char **e = NULL;
745         bool close_session = false;
746         pid_t pam_pid = 0, parent_pid;
747         int flags = 0;
748
749         assert(name);
750         assert(user);
751         assert(pam_env);
752
753         /* We set up PAM in the parent process, then fork. The child
754          * will then stay around until killed via PR_GET_PDEATHSIG or
755          * systemd via the cgroup logic. It will then remove the PAM
756          * session again. The parent process will exec() the actual
757          * daemon. We do things this way to ensure that the main PID
758          * of the daemon is the one we initially fork()ed. */
759
760         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
761                 flags |= PAM_SILENT;
762
763         pam_code = pam_start(name, user, &conv, &handle);
764         if (pam_code != PAM_SUCCESS) {
765                 handle = NULL;
766                 goto fail;
767         }
768
769         if (tty) {
770                 pam_code = pam_set_item(handle, PAM_TTY, tty);
771                 if (pam_code != PAM_SUCCESS)
772                         goto fail;
773         }
774
775         pam_code = pam_acct_mgmt(handle, flags);
776         if (pam_code != PAM_SUCCESS)
777                 goto fail;
778
779         pam_code = pam_open_session(handle, flags);
780         if (pam_code != PAM_SUCCESS)
781                 goto fail;
782
783         close_session = true;
784
785         e = pam_getenvlist(handle);
786         if (!e) {
787                 pam_code = PAM_BUF_ERR;
788                 goto fail;
789         }
790
791         /* Block SIGTERM, so that we know that it won't get lost in
792          * the child */
793         if (sigemptyset(&ss) < 0 ||
794             sigaddset(&ss, SIGTERM) < 0 ||
795             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
796                 goto fail;
797
798         parent_pid = getpid();
799
800         pam_pid = fork();
801         if (pam_pid < 0)
802                 goto fail;
803
804         if (pam_pid == 0) {
805                 int sig;
806                 int r = EXIT_PAM;
807
808                 /* The child's job is to reset the PAM session on
809                  * termination */
810
811                 /* This string must fit in 10 chars (i.e. the length
812                  * of "/sbin/init"), to look pretty in /bin/ps */
813                 rename_process("(sd-pam)");
814
815                 /* Make sure we don't keep open the passed fds in this
816                 child. We assume that otherwise only those fds are
817                 open here that have been opened by PAM. */
818                 close_many(fds, n_fds);
819
820                 /* Drop privileges - we don't need any to pam_close_session
821                  * and this will make PR_SET_PDEATHSIG work in most cases.
822                  * If this fails, ignore the error - but expect sd-pam threads
823                  * to fail to exit normally */
824                 if (setresuid(uid, uid, uid) < 0)
825                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
826
827                 /* Wait until our parent died. This will only work if
828                  * the above setresuid() succeeds, otherwise the kernel
829                  * will not allow unprivileged parents kill their privileged
830                  * children this way. We rely on the control groups kill logic
831                  * to do the rest for us. */
832                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
833                         goto child_finish;
834
835                 /* Check if our parent process might already have
836                  * died? */
837                 if (getppid() == parent_pid) {
838                         for (;;) {
839                                 if (sigwait(&ss, &sig) < 0) {
840                                         if (errno == EINTR)
841                                                 continue;
842
843                                         goto child_finish;
844                                 }
845
846                                 assert(sig == SIGTERM);
847                                 break;
848                         }
849                 }
850
851                 /* If our parent died we'll end the session */
852                 if (getppid() != parent_pid) {
853                         pam_code = pam_close_session(handle, flags);
854                         if (pam_code != PAM_SUCCESS)
855                                 goto child_finish;
856                 }
857
858                 r = 0;
859
860         child_finish:
861                 pam_end(handle, pam_code | flags);
862                 _exit(r);
863         }
864
865         /* If the child was forked off successfully it will do all the
866          * cleanups, so forget about the handle here. */
867         handle = NULL;
868
869         /* Unblock SIGTERM again in the parent */
870         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
871                 goto fail;
872
873         /* We close the log explicitly here, since the PAM modules
874          * might have opened it, but we don't want this fd around. */
875         closelog();
876
877         *pam_env = e;
878         e = NULL;
879
880         return 0;
881
882 fail:
883         if (pam_code != PAM_SUCCESS) {
884                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
885                 err = -EPERM;  /* PAM errors do not map to errno */
886         } else {
887                 log_error("PAM failed: %m");
888                 err = -errno;
889         }
890
891         if (handle) {
892                 if (close_session)
893                         pam_code = pam_close_session(handle, flags);
894
895                 pam_end(handle, pam_code | flags);
896         }
897
898         strv_free(e);
899
900         closelog();
901
902         if (pam_pid > 1) {
903                 kill(pam_pid, SIGTERM);
904                 kill(pam_pid, SIGCONT);
905         }
906
907         return err;
908 }
909 #endif
910
911 static void rename_process_from_path(const char *path) {
912         char process_name[11];
913         const char *p;
914         size_t l;
915
916         /* This resulting string must fit in 10 chars (i.e. the length
917          * of "/sbin/init") to look pretty in /bin/ps */
918
919         p = basename(path);
920         if (isempty(p)) {
921                 rename_process("(...)");
922                 return;
923         }
924
925         l = strlen(p);
926         if (l > 8) {
927                 /* The end of the process name is usually more
928                  * interesting, since the first bit might just be
929                  * "systemd-" */
930                 p = p + l - 8;
931                 l = 8;
932         }
933
934         process_name[0] = '(';
935         memcpy(process_name+1, p, l);
936         process_name[1+l] = ')';
937         process_name[1+l+1] = 0;
938
939         rename_process(process_name);
940 }
941
942 #ifdef HAVE_SECCOMP
943
944 static int apply_seccomp(const ExecContext *c) {
945         uint32_t negative_action, action;
946         scmp_filter_ctx *seccomp;
947         Iterator i;
948         void *id;
949         int r;
950
951         assert(c);
952
953         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
954
955         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
956         if (!seccomp)
957                 return -ENOMEM;
958
959         if (c->syscall_archs) {
960
961                 SET_FOREACH(id, c->syscall_archs, i) {
962                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
963                         if (r == -EEXIST)
964                                 continue;
965                         if (r < 0)
966                                 goto finish;
967                 }
968
969         } else {
970                 r = seccomp_add_secondary_archs(seccomp);
971                 if (r < 0)
972                         goto finish;
973         }
974
975         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
976         SET_FOREACH(id, c->syscall_filter, i) {
977                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
978                 if (r < 0)
979                         goto finish;
980         }
981
982         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
983         if (r < 0)
984                 goto finish;
985
986         r = seccomp_load(seccomp);
987
988 finish:
989         seccomp_release(seccomp);
990         return r;
991 }
992
993 static int apply_address_families(const ExecContext *c) {
994         scmp_filter_ctx *seccomp;
995         Iterator i;
996         int r;
997
998         assert(c);
999
1000         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1001         if (!seccomp)
1002                 return -ENOMEM;
1003
1004         r = seccomp_add_secondary_archs(seccomp);
1005         if (r < 0)
1006                 goto finish;
1007
1008         if (c->address_families_whitelist) {
1009                 int af, first = 0, last = 0;
1010                 void *afp;
1011
1012                 /* If this is a whitelist, we first block the address
1013                  * families that are out of range and then everything
1014                  * that is not in the set. First, we find the lowest
1015                  * and highest address family in the set. */
1016
1017                 SET_FOREACH(afp, c->address_families, i) {
1018                         af = PTR_TO_INT(afp);
1019
1020                         if (af <= 0 || af >= af_max())
1021                                 continue;
1022
1023                         if (first == 0 || af < first)
1024                                 first = af;
1025
1026                         if (last == 0 || af > last)
1027                                 last = af;
1028                 }
1029
1030                 assert((first == 0) == (last == 0));
1031
1032                 if (first == 0) {
1033
1034                         /* No entries in the valid range, block everything */
1035                         r = seccomp_rule_add(
1036                                         seccomp,
1037                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1038                                         SCMP_SYS(socket),
1039                                         0);
1040                         if (r < 0)
1041                                 goto finish;
1042
1043                 } else {
1044
1045                         /* Block everything below the first entry */
1046                         r = seccomp_rule_add(
1047                                         seccomp,
1048                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1049                                         SCMP_SYS(socket),
1050                                         1,
1051                                         SCMP_A0(SCMP_CMP_LT, first));
1052                         if (r < 0)
1053                                 goto finish;
1054
1055                         /* Block everything above the last entry */
1056                         r = seccomp_rule_add(
1057                                         seccomp,
1058                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1059                                         SCMP_SYS(socket),
1060                                         1,
1061                                         SCMP_A0(SCMP_CMP_GT, last));
1062                         if (r < 0)
1063                                 goto finish;
1064
1065                         /* Block everything between the first and last
1066                          * entry */
1067                         for (af = 1; af < af_max(); af++) {
1068
1069                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1070                                         continue;
1071
1072                                 r = seccomp_rule_add(
1073                                                 seccomp,
1074                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1075                                                 SCMP_SYS(socket),
1076                                                 1,
1077                                                 SCMP_A0(SCMP_CMP_EQ, af));
1078                                 if (r < 0)
1079                                         goto finish;
1080                         }
1081                 }
1082
1083         } else {
1084                 void *af;
1085
1086                 /* If this is a blacklist, then generate one rule for
1087                  * each address family that are then combined in OR
1088                  * checks. */
1089
1090                 SET_FOREACH(af, c->address_families, i) {
1091
1092                         r = seccomp_rule_add(
1093                                         seccomp,
1094                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1095                                         SCMP_SYS(socket),
1096                                         1,
1097                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1098                         if (r < 0)
1099                                 goto finish;
1100                 }
1101         }
1102
1103         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1104         if (r < 0)
1105                 goto finish;
1106
1107         r = seccomp_load(seccomp);
1108
1109 finish:
1110         seccomp_release(seccomp);
1111         return r;
1112 }
1113
1114 #endif
1115
1116 static void do_idle_pipe_dance(int idle_pipe[4]) {
1117         assert(idle_pipe);
1118
1119
1120         safe_close(idle_pipe[1]);
1121         safe_close(idle_pipe[2]);
1122
1123         if (idle_pipe[0] >= 0) {
1124                 int r;
1125
1126                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1127
1128                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1129                         /* Signal systemd that we are bored and want to continue. */
1130                         write(idle_pipe[3], "x", 1);
1131
1132                         /* Wait for systemd to react to the signal above. */
1133                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1134                 }
1135
1136                 safe_close(idle_pipe[0]);
1137
1138         }
1139
1140         safe_close(idle_pipe[3]);
1141 }
1142
1143 static int build_environment(
1144                 const ExecContext *c,
1145                 unsigned n_fds,
1146                 usec_t watchdog_usec,
1147                 const char *home,
1148                 const char *username,
1149                 const char *shell,
1150                 char ***ret) {
1151
1152         _cleanup_strv_free_ char **our_env = NULL;
1153         unsigned n_env = 0;
1154         char *x;
1155
1156         assert(c);
1157         assert(ret);
1158
1159         our_env = new0(char*, 10);
1160         if (!our_env)
1161                 return -ENOMEM;
1162
1163         if (n_fds > 0) {
1164                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1165                         return -ENOMEM;
1166                 our_env[n_env++] = x;
1167
1168                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1169                         return -ENOMEM;
1170                 our_env[n_env++] = x;
1171         }
1172
1173         if (watchdog_usec > 0) {
1174                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1175                         return -ENOMEM;
1176                 our_env[n_env++] = x;
1177
1178                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1179                         return -ENOMEM;
1180                 our_env[n_env++] = x;
1181         }
1182
1183         if (home) {
1184                 x = strappend("HOME=", home);
1185                 if (!x)
1186                         return -ENOMEM;
1187                 our_env[n_env++] = x;
1188         }
1189
1190         if (username) {
1191                 x = strappend("LOGNAME=", username);
1192                 if (!x)
1193                         return -ENOMEM;
1194                 our_env[n_env++] = x;
1195
1196                 x = strappend("USER=", username);
1197                 if (!x)
1198                         return -ENOMEM;
1199                 our_env[n_env++] = x;
1200         }
1201
1202         if (shell) {
1203                 x = strappend("SHELL=", shell);
1204                 if (!x)
1205                         return -ENOMEM;
1206                 our_env[n_env++] = x;
1207         }
1208
1209         if (is_terminal_input(c->std_input) ||
1210             c->std_output == EXEC_OUTPUT_TTY ||
1211             c->std_error == EXEC_OUTPUT_TTY ||
1212             c->tty_path) {
1213
1214                 x = strdup(default_term_for_tty(tty_path(c)));
1215                 if (!x)
1216                         return -ENOMEM;
1217                 our_env[n_env++] = x;
1218         }
1219
1220         our_env[n_env++] = NULL;
1221         assert(n_env <= 10);
1222
1223         *ret = our_env;
1224         our_env = NULL;
1225
1226         return 0;
1227 }
1228
1229 static int exec_child(ExecCommand *command,
1230                       const ExecContext *context,
1231                       const ExecParameters *params,
1232                       ExecRuntime *runtime,
1233                       char **argv,
1234                       int socket_fd,
1235                       int *fds, unsigned n_fds,
1236                       char **files_env,
1237                       int *error) {
1238
1239         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1240         const char *username = NULL, *home = NULL, *shell = NULL;
1241         unsigned n_dont_close = 0;
1242         int dont_close[n_fds + 4];
1243         uid_t uid = (uid_t) -1;
1244         gid_t gid = (gid_t) -1;
1245         int i, err;
1246
1247         assert(command);
1248         assert(context);
1249         assert(params);
1250         assert(error);
1251
1252         rename_process_from_path(command->path);
1253
1254         /* We reset exactly these signals, since they are the
1255          * only ones we set to SIG_IGN in the main daemon. All
1256          * others we leave untouched because we set them to
1257          * SIG_DFL or a valid handler initially, both of which
1258          * will be demoted to SIG_DFL. */
1259         default_signals(SIGNALS_CRASH_HANDLER,
1260                         SIGNALS_IGNORE, -1);
1261
1262         if (context->ignore_sigpipe)
1263                 ignore_signals(SIGPIPE, -1);
1264
1265         err = reset_signal_mask();
1266         if (err < 0) {
1267                 *error = EXIT_SIGNAL_MASK;
1268                 return err;
1269         }
1270
1271         if (params->idle_pipe)
1272                 do_idle_pipe_dance(params->idle_pipe);
1273
1274         /* Close sockets very early to make sure we don't
1275          * block init reexecution because it cannot bind its
1276          * sockets */
1277         log_forget_fds();
1278
1279         if (socket_fd >= 0)
1280                 dont_close[n_dont_close++] = socket_fd;
1281         if (n_fds > 0) {
1282                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1283                 n_dont_close += n_fds;
1284         }
1285         if (params->bus_endpoint_fd >= 0)
1286                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1287         if (runtime) {
1288                 if (runtime->netns_storage_socket[0] >= 0)
1289                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1290                 if (runtime->netns_storage_socket[1] >= 0)
1291                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1292         }
1293
1294         err = close_all_fds(dont_close, n_dont_close);
1295         if (err < 0) {
1296                 *error = EXIT_FDS;
1297                 return err;
1298         }
1299
1300         if (!context->same_pgrp)
1301                 if (setsid() < 0) {
1302                         *error = EXIT_SETSID;
1303                         return -errno;
1304                 }
1305
1306         exec_context_tty_reset(context);
1307
1308         if (params->confirm_spawn) {
1309                 char response;
1310
1311                 err = ask_for_confirmation(&response, argv);
1312                 if (err == -ETIMEDOUT)
1313                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1314                 else if (err < 0)
1315                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1316                 else if (response == 's') {
1317                         write_confirm_message("Skipping execution.\n");
1318                         *error = EXIT_CONFIRM;
1319                         return -ECANCELED;
1320                 } else if (response == 'n') {
1321                         write_confirm_message("Failing execution.\n");
1322                         *error = 0;
1323                         return 0;
1324                 }
1325         }
1326
1327         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1328          * must sure to drop O_NONBLOCK */
1329         if (socket_fd >= 0)
1330                 fd_nonblock(socket_fd, false);
1331
1332         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1333         if (err < 0) {
1334                 *error = EXIT_STDIN;
1335                 return err;
1336         }
1337
1338         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1339         if (err < 0) {
1340                 *error = EXIT_STDOUT;
1341                 return err;
1342         }
1343
1344         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1345         if (err < 0) {
1346                 *error = EXIT_STDERR;
1347                 return err;
1348         }
1349
1350         if (params->cgroup_path) {
1351                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1352                 if (err < 0) {
1353                         *error = EXIT_CGROUP;
1354                         return err;
1355                 }
1356         }
1357
1358         if (context->oom_score_adjust_set) {
1359                 char t[16];
1360
1361                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1362                 char_array_0(t);
1363
1364                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1365                         *error = EXIT_OOM_ADJUST;
1366                         return -errno;
1367                 }
1368         }
1369
1370         if (context->nice_set)
1371                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1372                         *error = EXIT_NICE;
1373                         return -errno;
1374                 }
1375
1376         if (context->cpu_sched_set) {
1377                 struct sched_param param = {
1378                         .sched_priority = context->cpu_sched_priority,
1379                 };
1380
1381                 err = sched_setscheduler(0,
1382                                          context->cpu_sched_policy |
1383                                          (context->cpu_sched_reset_on_fork ?
1384                                           SCHED_RESET_ON_FORK : 0),
1385                                          &param);
1386                 if (err < 0) {
1387                         *error = EXIT_SETSCHEDULER;
1388                         return -errno;
1389                 }
1390         }
1391
1392         if (context->cpuset)
1393                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1394                         *error = EXIT_CPUAFFINITY;
1395                         return -errno;
1396                 }
1397
1398         if (context->ioprio_set)
1399                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1400                         *error = EXIT_IOPRIO;
1401                         return -errno;
1402                 }
1403
1404         if (context->timer_slack_nsec != NSEC_INFINITY)
1405                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1406                         *error = EXIT_TIMERSLACK;
1407                         return -errno;
1408                 }
1409
1410         if (context->personality != 0xffffffffUL)
1411                 if (personality(context->personality) < 0) {
1412                         *error = EXIT_PERSONALITY;
1413                         return -errno;
1414                 }
1415
1416         if (context->utmp_id)
1417                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1418
1419         if (context->user) {
1420                 username = context->user;
1421                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1422                 if (err < 0) {
1423                         *error = EXIT_USER;
1424                         return err;
1425                 }
1426
1427                 if (is_terminal_input(context->std_input)) {
1428                         err = chown_terminal(STDIN_FILENO, uid);
1429                         if (err < 0) {
1430                                 *error = EXIT_STDIN;
1431                                 return err;
1432                         }
1433                 }
1434         }
1435
1436 #ifdef ENABLE_KDBUS
1437         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1438                 uid_t ep_uid = (uid == (uid_t) -1) ? 0 : uid;
1439
1440                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1441                 if (err < 0) {
1442                         *error = EXIT_BUS_ENDPOINT;
1443                         return err;
1444                 }
1445         }
1446 #endif
1447
1448         /* If delegation is enabled we'll pass ownership of the cgroup
1449          * (but only in systemd's own controller hierarchy!) to the
1450          * user of the new process. */
1451         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1452                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1453                 if (err < 0) {
1454                         *error = EXIT_CGROUP;
1455                         return err;
1456                 }
1457
1458
1459                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1460                 if (err < 0) {
1461                         *error = EXIT_CGROUP;
1462                         return err;
1463                 }
1464         }
1465
1466         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1467                 char **rt;
1468
1469                 STRV_FOREACH(rt, context->runtime_directory) {
1470                         _cleanup_free_ char *p;
1471
1472                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1473                         if (!p) {
1474                                 *error = EXIT_RUNTIME_DIRECTORY;
1475                                 return -ENOMEM;
1476                         }
1477
1478                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1479                         if (err < 0) {
1480                                 *error = EXIT_RUNTIME_DIRECTORY;
1481                                 return err;
1482                         }
1483                 }
1484         }
1485
1486         if (params->apply_permissions) {
1487                 err = enforce_groups(context, username, gid);
1488                 if (err < 0) {
1489                         *error = EXIT_GROUP;
1490                         return err;
1491                 }
1492         }
1493
1494         umask(context->umask);
1495
1496 #ifdef HAVE_PAM
1497         if (params->apply_permissions && context->pam_name && username) {
1498                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1499                 if (err < 0) {
1500                         *error = EXIT_PAM;
1501                         return err;
1502                 }
1503         }
1504 #endif
1505
1506         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1507                 err = setup_netns(runtime->netns_storage_socket);
1508                 if (err < 0) {
1509                         *error = EXIT_NETWORK;
1510                         return err;
1511                 }
1512         }
1513
1514         if (!strv_isempty(context->read_write_dirs) ||
1515             !strv_isempty(context->read_only_dirs) ||
1516             !strv_isempty(context->inaccessible_dirs) ||
1517             context->mount_flags != 0 ||
1518             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1519             params->bus_endpoint_path ||
1520             context->private_devices ||
1521             context->protect_system != PROTECT_SYSTEM_NO ||
1522             context->protect_home != PROTECT_HOME_NO) {
1523
1524                 char *tmp = NULL, *var = NULL;
1525
1526                 /* The runtime struct only contains the parent
1527                  * of the private /tmp, which is
1528                  * non-accessible to world users. Inside of it
1529                  * there's a /tmp that is sticky, and that's
1530                  * the one we want to use here. */
1531
1532                 if (context->private_tmp && runtime) {
1533                         if (runtime->tmp_dir)
1534                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1535                         if (runtime->var_tmp_dir)
1536                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1537                 }
1538
1539                 err = setup_namespace(
1540                                 context->read_write_dirs,
1541                                 context->read_only_dirs,
1542                                 context->inaccessible_dirs,
1543                                 tmp,
1544                                 var,
1545                                 params->bus_endpoint_path,
1546                                 context->private_devices,
1547                                 context->protect_home,
1548                                 context->protect_system,
1549                                 context->mount_flags);
1550
1551                 if (err == -EPERM)
1552                         log_warning_unit(params->unit_id, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %s", strerror(-err));
1553                 else if (err < 0) {
1554                         *error = EXIT_NAMESPACE;
1555                         return err;
1556                 }
1557         }
1558
1559         if (params->apply_chroot) {
1560                 if (context->root_directory)
1561                         if (chroot(context->root_directory) < 0) {
1562                                 *error = EXIT_CHROOT;
1563                                 return -errno;
1564                         }
1565
1566                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1567                         *error = EXIT_CHDIR;
1568                         return -errno;
1569                 }
1570         } else {
1571                 _cleanup_free_ char *d = NULL;
1572
1573                 if (asprintf(&d, "%s/%s",
1574                              context->root_directory ? context->root_directory : "",
1575                              context->working_directory ? context->working_directory : "") < 0) {
1576                         *error = EXIT_MEMORY;
1577                         return -ENOMEM;
1578                 }
1579
1580                 if (chdir(d) < 0) {
1581                         *error = EXIT_CHDIR;
1582                         return -errno;
1583                 }
1584         }
1585
1586         /* We repeat the fd closing here, to make sure that
1587          * nothing is leaked from the PAM modules. Note that
1588          * we are more aggressive this time since socket_fd
1589          * and the netns fds we don't need anymore. The custom
1590          * endpoint fd was needed to upload the policy and can
1591          * now be closed as well. */
1592         err = close_all_fds(fds, n_fds);
1593         if (err >= 0)
1594                 err = shift_fds(fds, n_fds);
1595         if (err >= 0)
1596                 err = flags_fds(fds, n_fds, context->non_blocking);
1597         if (err < 0) {
1598                 *error = EXIT_FDS;
1599                 return err;
1600         }
1601
1602         if (params->apply_permissions) {
1603
1604                 for (i = 0; i < _RLIMIT_MAX; i++) {
1605                         if (!context->rlimit[i])
1606                                 continue;
1607
1608                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1609                                 *error = EXIT_LIMITS;
1610                                 return -errno;
1611                         }
1612                 }
1613
1614                 if (context->capability_bounding_set_drop) {
1615                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1616                         if (err < 0) {
1617                                 *error = EXIT_CAPABILITIES;
1618                                 return err;
1619                         }
1620                 }
1621
1622 #ifdef HAVE_SMACK
1623                 if (context->smack_process_label) {
1624                         err = mac_smack_apply_pid(0, context->smack_process_label);
1625                         if (err < 0) {
1626                                 *error = EXIT_SMACK_PROCESS_LABEL;
1627                                 return err;
1628                         }
1629                 }
1630 #endif
1631
1632                 if (context->user) {
1633                         err = enforce_user(context, uid);
1634                         if (err < 0) {
1635                                 *error = EXIT_USER;
1636                                 return err;
1637                         }
1638                 }
1639
1640                 /* PR_GET_SECUREBITS is not privileged, while
1641                  * PR_SET_SECUREBITS is. So to suppress
1642                  * potential EPERMs we'll try not to call
1643                  * PR_SET_SECUREBITS unless necessary. */
1644                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1645                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1646                                 *error = EXIT_SECUREBITS;
1647                                 return -errno;
1648                         }
1649
1650                 if (context->capabilities)
1651                         if (cap_set_proc(context->capabilities) < 0) {
1652                                 *error = EXIT_CAPABILITIES;
1653                                 return -errno;
1654                         }
1655
1656                 if (context->no_new_privileges)
1657                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1658                                 *error = EXIT_NO_NEW_PRIVILEGES;
1659                                 return -errno;
1660                         }
1661
1662 #ifdef HAVE_SECCOMP
1663                 if (context->address_families_whitelist ||
1664                     !set_isempty(context->address_families)) {
1665                         err = apply_address_families(context);
1666                         if (err < 0) {
1667                                 *error = EXIT_ADDRESS_FAMILIES;
1668                                 return err;
1669                         }
1670                 }
1671
1672                 if (context->syscall_whitelist ||
1673                     !set_isempty(context->syscall_filter) ||
1674                     !set_isempty(context->syscall_archs)) {
1675                         err = apply_seccomp(context);
1676                         if (err < 0) {
1677                                 *error = EXIT_SECCOMP;
1678                                 return err;
1679                         }
1680                 }
1681 #endif
1682
1683 #ifdef HAVE_SELINUX
1684                 if (mac_selinux_use()) {
1685                         if (context->selinux_context) {
1686                                 err = setexeccon(context->selinux_context);
1687                                 if (err < 0 && !context->selinux_context_ignore) {
1688                                         *error = EXIT_SELINUX_CONTEXT;
1689                                         return err;
1690                                 }
1691                         }
1692
1693                         if (params->selinux_context_net && socket_fd >= 0) {
1694                                 _cleanup_free_ char *label = NULL;
1695
1696                                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, &label);
1697                                 if (err < 0) {
1698                                         *error = EXIT_SELINUX_CONTEXT;
1699                                         return err;
1700                                 }
1701
1702                                 err = setexeccon(label);
1703                                 if (err < 0) {
1704                                         *error = EXIT_SELINUX_CONTEXT;
1705                                         return err;
1706                                 }
1707                         }
1708                 }
1709 #endif
1710
1711 #ifdef HAVE_APPARMOR
1712                 if (context->apparmor_profile && mac_apparmor_use()) {
1713                         err = aa_change_onexec(context->apparmor_profile);
1714                         if (err < 0 && !context->apparmor_profile_ignore) {
1715                                 *error = EXIT_APPARMOR_PROFILE;
1716                                 return -errno;
1717                         }
1718                 }
1719 #endif
1720         }
1721
1722         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1723         if (err < 0) {
1724                 *error = EXIT_MEMORY;
1725                 return err;
1726         }
1727
1728         final_env = strv_env_merge(5,
1729                                    params->environment,
1730                                    our_env,
1731                                    context->environment,
1732                                    files_env,
1733                                    pam_env,
1734                                    NULL);
1735         if (!final_env) {
1736                 *error = EXIT_MEMORY;
1737                 return -ENOMEM;
1738         }
1739
1740         final_argv = replace_env_argv(argv, final_env);
1741         if (!final_argv) {
1742                 *error = EXIT_MEMORY;
1743                 return -ENOMEM;
1744         }
1745
1746         final_env = strv_env_clean(final_env);
1747
1748         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1749                 _cleanup_free_ char *line;
1750
1751                 line = exec_command_line(final_argv);
1752                 if (line) {
1753                         log_open();
1754                         log_struct_unit(LOG_DEBUG,
1755                                         params->unit_id,
1756                                         "EXECUTABLE=%s", command->path,
1757                                         "MESSAGE=Executing: %s", line,
1758                                         NULL);
1759                         log_close();
1760                 }
1761         }
1762         execve(command->path, final_argv, final_env);
1763         *error = EXIT_EXEC;
1764         return -errno;
1765 }
1766
1767 int exec_spawn(ExecCommand *command,
1768                const ExecContext *context,
1769                const ExecParameters *params,
1770                ExecRuntime *runtime,
1771                pid_t *ret) {
1772
1773         _cleanup_strv_free_ char **files_env = NULL;
1774         int *fds = NULL; unsigned n_fds = 0;
1775         char *line, **argv;
1776         int socket_fd;
1777         pid_t pid;
1778         int err;
1779
1780         assert(command);
1781         assert(context);
1782         assert(ret);
1783         assert(params);
1784         assert(params->fds || params->n_fds <= 0);
1785
1786         if (context->std_input == EXEC_INPUT_SOCKET ||
1787             context->std_output == EXEC_OUTPUT_SOCKET ||
1788             context->std_error == EXEC_OUTPUT_SOCKET) {
1789
1790                 if (params->n_fds != 1)
1791                         return -EINVAL;
1792
1793                 socket_fd = params->fds[0];
1794         } else {
1795                 socket_fd = -1;
1796                 fds = params->fds;
1797                 n_fds = params->n_fds;
1798         }
1799
1800         err = exec_context_load_environment(context, params->unit_id, &files_env);
1801         if (err < 0) {
1802                 log_struct_unit(LOG_ERR,
1803                            params->unit_id,
1804                            "MESSAGE=Failed to load environment files: %s", strerror(-err),
1805                            "ERRNO=%d", -err,
1806                            NULL);
1807                 return err;
1808         }
1809
1810         argv = params->argv ?: command->argv;
1811
1812         line = exec_command_line(argv);
1813         if (!line)
1814                 return log_oom();
1815
1816         log_struct_unit(LOG_DEBUG,
1817                         params->unit_id,
1818                         "EXECUTABLE=%s", command->path,
1819                         "MESSAGE=About to execute: %s", line,
1820                         NULL);
1821         free(line);
1822
1823         pid = fork();
1824         if (pid < 0)
1825                 return -errno;
1826
1827         if (pid == 0) {
1828                 int r;
1829
1830                 err = exec_child(command,
1831                                  context,
1832                                  params,
1833                                  runtime,
1834                                  argv,
1835                                  socket_fd,
1836                                  fds, n_fds,
1837                                  files_env,
1838                                  &r);
1839                 if (r != 0) {
1840                         log_open();
1841                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1842                                    "EXECUTABLE=%s", command->path,
1843                                    "MESSAGE=Failed at step %s spawning %s: %s",
1844                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1845                                           command->path, strerror(-err),
1846                                    "ERRNO=%d", -err,
1847                                    NULL);
1848                         log_close();
1849                 }
1850
1851                 _exit(r);
1852         }
1853
1854         log_struct_unit(LOG_DEBUG,
1855                         params->unit_id,
1856                         "MESSAGE=Forked %s as "PID_FMT,
1857                         command->path, pid,
1858                         NULL);
1859
1860         /* We add the new process to the cgroup both in the child (so
1861          * that we can be sure that no user code is ever executed
1862          * outside of the cgroup) and in the parent (so that we can be
1863          * sure that when we kill the cgroup the process will be
1864          * killed too). */
1865         if (params->cgroup_path)
1866                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1867
1868         exec_status_start(&command->exec_status, pid);
1869
1870         *ret = pid;
1871         return 0;
1872 }
1873
1874 void exec_context_init(ExecContext *c) {
1875         assert(c);
1876
1877         c->umask = 0022;
1878         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1879         c->cpu_sched_policy = SCHED_OTHER;
1880         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1881         c->syslog_level_prefix = true;
1882         c->ignore_sigpipe = true;
1883         c->timer_slack_nsec = NSEC_INFINITY;
1884         c->personality = 0xffffffffUL;
1885         c->runtime_directory_mode = 0755;
1886 }
1887
1888 void exec_context_done(ExecContext *c) {
1889         unsigned l;
1890
1891         assert(c);
1892
1893         strv_free(c->environment);
1894         c->environment = NULL;
1895
1896         strv_free(c->environment_files);
1897         c->environment_files = NULL;
1898
1899         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1900                 free(c->rlimit[l]);
1901                 c->rlimit[l] = NULL;
1902         }
1903
1904         free(c->working_directory);
1905         c->working_directory = NULL;
1906         free(c->root_directory);
1907         c->root_directory = NULL;
1908
1909         free(c->tty_path);
1910         c->tty_path = NULL;
1911
1912         free(c->syslog_identifier);
1913         c->syslog_identifier = NULL;
1914
1915         free(c->user);
1916         c->user = NULL;
1917
1918         free(c->group);
1919         c->group = NULL;
1920
1921         strv_free(c->supplementary_groups);
1922         c->supplementary_groups = NULL;
1923
1924         free(c->pam_name);
1925         c->pam_name = NULL;
1926
1927         if (c->capabilities) {
1928                 cap_free(c->capabilities);
1929                 c->capabilities = NULL;
1930         }
1931
1932         strv_free(c->read_only_dirs);
1933         c->read_only_dirs = NULL;
1934
1935         strv_free(c->read_write_dirs);
1936         c->read_write_dirs = NULL;
1937
1938         strv_free(c->inaccessible_dirs);
1939         c->inaccessible_dirs = NULL;
1940
1941         if (c->cpuset)
1942                 CPU_FREE(c->cpuset);
1943
1944         free(c->utmp_id);
1945         c->utmp_id = NULL;
1946
1947         free(c->selinux_context);
1948         c->selinux_context = NULL;
1949
1950         free(c->apparmor_profile);
1951         c->apparmor_profile = NULL;
1952
1953         set_free(c->syscall_filter);
1954         c->syscall_filter = NULL;
1955
1956         set_free(c->syscall_archs);
1957         c->syscall_archs = NULL;
1958
1959         set_free(c->address_families);
1960         c->address_families = NULL;
1961
1962         strv_free(c->runtime_directory);
1963         c->runtime_directory = NULL;
1964
1965         bus_endpoint_free(c->bus_endpoint);
1966         c->bus_endpoint = NULL;
1967 }
1968
1969 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1970         char **i;
1971
1972         assert(c);
1973
1974         if (!runtime_prefix)
1975                 return 0;
1976
1977         STRV_FOREACH(i, c->runtime_directory) {
1978                 _cleanup_free_ char *p;
1979
1980                 p = strjoin(runtime_prefix, "/", *i, NULL);
1981                 if (!p)
1982                         return -ENOMEM;
1983
1984                 /* We execute this synchronously, since we need to be
1985                  * sure this is gone when we start the service
1986                  * next. */
1987                 rm_rf_dangerous(p, false, true, false);
1988         }
1989
1990         return 0;
1991 }
1992
1993 void exec_command_done(ExecCommand *c) {
1994         assert(c);
1995
1996         free(c->path);
1997         c->path = NULL;
1998
1999         strv_free(c->argv);
2000         c->argv = NULL;
2001 }
2002
2003 void exec_command_done_array(ExecCommand *c, unsigned n) {
2004         unsigned i;
2005
2006         for (i = 0; i < n; i++)
2007                 exec_command_done(c+i);
2008 }
2009
2010 void exec_command_free_list(ExecCommand *c) {
2011         ExecCommand *i;
2012
2013         while ((i = c)) {
2014                 LIST_REMOVE(command, c, i);
2015                 exec_command_done(i);
2016                 free(i);
2017         }
2018 }
2019
2020 void exec_command_free_array(ExecCommand **c, unsigned n) {
2021         unsigned i;
2022
2023         for (i = 0; i < n; i++) {
2024                 exec_command_free_list(c[i]);
2025                 c[i] = NULL;
2026         }
2027 }
2028
2029 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2030         char **i, **r = NULL;
2031
2032         assert(c);
2033         assert(l);
2034
2035         STRV_FOREACH(i, c->environment_files) {
2036                 char *fn;
2037                 int k;
2038                 bool ignore = false;
2039                 char **p;
2040                 _cleanup_globfree_ glob_t pglob = {};
2041                 int count, n;
2042
2043                 fn = *i;
2044
2045                 if (fn[0] == '-') {
2046                         ignore = true;
2047                         fn ++;
2048                 }
2049
2050                 if (!path_is_absolute(fn)) {
2051                         if (ignore)
2052                                 continue;
2053
2054                         strv_free(r);
2055                         return -EINVAL;
2056                 }
2057
2058                 /* Filename supports globbing, take all matching files */
2059                 errno = 0;
2060                 if (glob(fn, 0, NULL, &pglob) != 0) {
2061                         if (ignore)
2062                                 continue;
2063
2064                         strv_free(r);
2065                         return errno ? -errno : -EINVAL;
2066                 }
2067                 count = pglob.gl_pathc;
2068                 if (count == 0) {
2069                         if (ignore)
2070                                 continue;
2071
2072                         strv_free(r);
2073                         return -EINVAL;
2074                 }
2075                 for (n = 0; n < count; n++) {
2076                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2077                         if (k < 0) {
2078                                 if (ignore)
2079                                         continue;
2080
2081                                 strv_free(r);
2082                                 return k;
2083                         }
2084                         /* Log invalid environment variables with filename */
2085                         if (p)
2086                                 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2087
2088                         if (r == NULL)
2089                                 r = p;
2090                         else {
2091                                 char **m;
2092
2093                                 m = strv_env_merge(2, r, p);
2094                                 strv_free(r);
2095                                 strv_free(p);
2096                                 if (!m)
2097                                         return -ENOMEM;
2098
2099                                 r = m;
2100                         }
2101                 }
2102         }
2103
2104         *l = r;
2105
2106         return 0;
2107 }
2108
2109 static bool tty_may_match_dev_console(const char *tty) {
2110         _cleanup_free_ char *active = NULL;
2111        char *console;
2112
2113         if (startswith(tty, "/dev/"))
2114                 tty += 5;
2115
2116         /* trivial identity? */
2117         if (streq(tty, "console"))
2118                 return true;
2119
2120         console = resolve_dev_console(&active);
2121         /* if we could not resolve, assume it may */
2122         if (!console)
2123                 return true;
2124
2125         /* "tty0" means the active VC, so it may be the same sometimes */
2126         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2127 }
2128
2129 bool exec_context_may_touch_console(ExecContext *ec) {
2130         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2131                 is_terminal_input(ec->std_input) ||
2132                 is_terminal_output(ec->std_output) ||
2133                 is_terminal_output(ec->std_error)) &&
2134                tty_may_match_dev_console(tty_path(ec));
2135 }
2136
2137 static void strv_fprintf(FILE *f, char **l) {
2138         char **g;
2139
2140         assert(f);
2141
2142         STRV_FOREACH(g, l)
2143                 fprintf(f, " %s", *g);
2144 }
2145
2146 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2147         char **e;
2148         unsigned i;
2149
2150         assert(c);
2151         assert(f);
2152
2153         prefix = strempty(prefix);
2154
2155         fprintf(f,
2156                 "%sUMask: %04o\n"
2157                 "%sWorkingDirectory: %s\n"
2158                 "%sRootDirectory: %s\n"
2159                 "%sNonBlocking: %s\n"
2160                 "%sPrivateTmp: %s\n"
2161                 "%sPrivateNetwork: %s\n"
2162                 "%sPrivateDevices: %s\n"
2163                 "%sProtectHome: %s\n"
2164                 "%sProtectSystem: %s\n"
2165                 "%sIgnoreSIGPIPE: %s\n",
2166                 prefix, c->umask,
2167                 prefix, c->working_directory ? c->working_directory : "/",
2168                 prefix, c->root_directory ? c->root_directory : "/",
2169                 prefix, yes_no(c->non_blocking),
2170                 prefix, yes_no(c->private_tmp),
2171                 prefix, yes_no(c->private_network),
2172                 prefix, yes_no(c->private_devices),
2173                 prefix, protect_home_to_string(c->protect_home),
2174                 prefix, protect_system_to_string(c->protect_system),
2175                 prefix, yes_no(c->ignore_sigpipe));
2176
2177         STRV_FOREACH(e, c->environment)
2178                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2179
2180         STRV_FOREACH(e, c->environment_files)
2181                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2182
2183         if (c->nice_set)
2184                 fprintf(f,
2185                         "%sNice: %i\n",
2186                         prefix, c->nice);
2187
2188         if (c->oom_score_adjust_set)
2189                 fprintf(f,
2190                         "%sOOMScoreAdjust: %i\n",
2191                         prefix, c->oom_score_adjust);
2192
2193         for (i = 0; i < RLIM_NLIMITS; i++)
2194                 if (c->rlimit[i])
2195                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2196                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2197
2198         if (c->ioprio_set) {
2199                 _cleanup_free_ char *class_str = NULL;
2200
2201                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2202                 fprintf(f,
2203                         "%sIOSchedulingClass: %s\n"
2204                         "%sIOPriority: %i\n",
2205                         prefix, strna(class_str),
2206                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2207         }
2208
2209         if (c->cpu_sched_set) {
2210                 _cleanup_free_ char *policy_str = NULL;
2211
2212                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2213                 fprintf(f,
2214                         "%sCPUSchedulingPolicy: %s\n"
2215                         "%sCPUSchedulingPriority: %i\n"
2216                         "%sCPUSchedulingResetOnFork: %s\n",
2217                         prefix, strna(policy_str),
2218                         prefix, c->cpu_sched_priority,
2219                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2220         }
2221
2222         if (c->cpuset) {
2223                 fprintf(f, "%sCPUAffinity:", prefix);
2224                 for (i = 0; i < c->cpuset_ncpus; i++)
2225                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2226                                 fprintf(f, " %u", i);
2227                 fputs("\n", f);
2228         }
2229
2230         if (c->timer_slack_nsec != NSEC_INFINITY)
2231                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2232
2233         fprintf(f,
2234                 "%sStandardInput: %s\n"
2235                 "%sStandardOutput: %s\n"
2236                 "%sStandardError: %s\n",
2237                 prefix, exec_input_to_string(c->std_input),
2238                 prefix, exec_output_to_string(c->std_output),
2239                 prefix, exec_output_to_string(c->std_error));
2240
2241         if (c->tty_path)
2242                 fprintf(f,
2243                         "%sTTYPath: %s\n"
2244                         "%sTTYReset: %s\n"
2245                         "%sTTYVHangup: %s\n"
2246                         "%sTTYVTDisallocate: %s\n",
2247                         prefix, c->tty_path,
2248                         prefix, yes_no(c->tty_reset),
2249                         prefix, yes_no(c->tty_vhangup),
2250                         prefix, yes_no(c->tty_vt_disallocate));
2251
2252         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2253             c->std_output == EXEC_OUTPUT_KMSG ||
2254             c->std_output == EXEC_OUTPUT_JOURNAL ||
2255             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2256             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2257             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2258             c->std_error == EXEC_OUTPUT_SYSLOG ||
2259             c->std_error == EXEC_OUTPUT_KMSG ||
2260             c->std_error == EXEC_OUTPUT_JOURNAL ||
2261             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2262             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2263             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2264
2265                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2266
2267                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2268                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2269
2270                 fprintf(f,
2271                         "%sSyslogFacility: %s\n"
2272                         "%sSyslogLevel: %s\n",
2273                         prefix, strna(fac_str),
2274                         prefix, strna(lvl_str));
2275         }
2276
2277         if (c->capabilities) {
2278                 _cleanup_cap_free_charp_ char *t;
2279
2280                 t = cap_to_text(c->capabilities, NULL);
2281                 if (t)
2282                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2283         }
2284
2285         if (c->secure_bits)
2286                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2287                         prefix,
2288                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2289                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2290                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2291                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2292                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2293                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2294
2295         if (c->capability_bounding_set_drop) {
2296                 unsigned long l;
2297                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2298
2299                 for (l = 0; l <= cap_last_cap(); l++)
2300                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2301                                 _cleanup_cap_free_charp_ char *t;
2302
2303                                 t = cap_to_name(l);
2304                                 if (t)
2305                                         fprintf(f, " %s", t);
2306                         }
2307
2308                 fputs("\n", f);
2309         }
2310
2311         if (c->user)
2312                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2313         if (c->group)
2314                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2315
2316         if (strv_length(c->supplementary_groups) > 0) {
2317                 fprintf(f, "%sSupplementaryGroups:", prefix);
2318                 strv_fprintf(f, c->supplementary_groups);
2319                 fputs("\n", f);
2320         }
2321
2322         if (c->pam_name)
2323                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2324
2325         if (strv_length(c->read_write_dirs) > 0) {
2326                 fprintf(f, "%sReadWriteDirs:", prefix);
2327                 strv_fprintf(f, c->read_write_dirs);
2328                 fputs("\n", f);
2329         }
2330
2331         if (strv_length(c->read_only_dirs) > 0) {
2332                 fprintf(f, "%sReadOnlyDirs:", prefix);
2333                 strv_fprintf(f, c->read_only_dirs);
2334                 fputs("\n", f);
2335         }
2336
2337         if (strv_length(c->inaccessible_dirs) > 0) {
2338                 fprintf(f, "%sInaccessibleDirs:", prefix);
2339                 strv_fprintf(f, c->inaccessible_dirs);
2340                 fputs("\n", f);
2341         }
2342
2343         if (c->utmp_id)
2344                 fprintf(f,
2345                         "%sUtmpIdentifier: %s\n",
2346                         prefix, c->utmp_id);
2347
2348         if (c->selinux_context)
2349                 fprintf(f,
2350                         "%sSELinuxContext: %s%s\n",
2351                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2352
2353         if (c->personality != 0xffffffffUL)
2354                 fprintf(f,
2355                         "%sPersonality: %s\n",
2356                         prefix, strna(personality_to_string(c->personality)));
2357
2358         if (c->syscall_filter) {
2359 #ifdef HAVE_SECCOMP
2360                 Iterator j;
2361                 void *id;
2362                 bool first = true;
2363 #endif
2364
2365                 fprintf(f,
2366                         "%sSystemCallFilter: ",
2367                         prefix);
2368
2369                 if (!c->syscall_whitelist)
2370                         fputc('~', f);
2371
2372 #ifdef HAVE_SECCOMP
2373                 SET_FOREACH(id, c->syscall_filter, j) {
2374                         _cleanup_free_ char *name = NULL;
2375
2376                         if (first)
2377                                 first = false;
2378                         else
2379                                 fputc(' ', f);
2380
2381                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2382                         fputs(strna(name), f);
2383                 }
2384 #endif
2385
2386                 fputc('\n', f);
2387         }
2388
2389         if (c->syscall_archs) {
2390 #ifdef HAVE_SECCOMP
2391                 Iterator j;
2392                 void *id;
2393 #endif
2394
2395                 fprintf(f,
2396                         "%sSystemCallArchitectures:",
2397                         prefix);
2398
2399 #ifdef HAVE_SECCOMP
2400                 SET_FOREACH(id, c->syscall_archs, j)
2401                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2402 #endif
2403                 fputc('\n', f);
2404         }
2405
2406         if (c->syscall_errno != 0)
2407                 fprintf(f,
2408                         "%sSystemCallErrorNumber: %s\n",
2409                         prefix, strna(errno_to_name(c->syscall_errno)));
2410
2411         if (c->apparmor_profile)
2412                 fprintf(f,
2413                         "%sAppArmorProfile: %s%s\n",
2414                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2415 }
2416
2417 bool exec_context_maintains_privileges(ExecContext *c) {
2418         assert(c);
2419
2420         /* Returns true if the process forked off would run run under
2421          * an unchanged UID or as root. */
2422
2423         if (!c->user)
2424                 return true;
2425
2426         if (streq(c->user, "root") || streq(c->user, "0"))
2427                 return true;
2428
2429         return false;
2430 }
2431
2432 void exec_status_start(ExecStatus *s, pid_t pid) {
2433         assert(s);
2434
2435         zero(*s);
2436         s->pid = pid;
2437         dual_timestamp_get(&s->start_timestamp);
2438 }
2439
2440 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2441         assert(s);
2442
2443         if (s->pid && s->pid != pid)
2444                 zero(*s);
2445
2446         s->pid = pid;
2447         dual_timestamp_get(&s->exit_timestamp);
2448
2449         s->code = code;
2450         s->status = status;
2451
2452         if (context) {
2453                 if (context->utmp_id)
2454                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2455
2456                 exec_context_tty_reset(context);
2457         }
2458 }
2459
2460 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2461         char buf[FORMAT_TIMESTAMP_MAX];
2462
2463         assert(s);
2464         assert(f);
2465
2466         if (s->pid <= 0)
2467                 return;
2468
2469         prefix = strempty(prefix);
2470
2471         fprintf(f,
2472                 "%sPID: "PID_FMT"\n",
2473                 prefix, s->pid);
2474
2475         if (s->start_timestamp.realtime > 0)
2476                 fprintf(f,
2477                         "%sStart Timestamp: %s\n",
2478                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2479
2480         if (s->exit_timestamp.realtime > 0)
2481                 fprintf(f,
2482                         "%sExit Timestamp: %s\n"
2483                         "%sExit Code: %s\n"
2484                         "%sExit Status: %i\n",
2485                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2486                         prefix, sigchld_code_to_string(s->code),
2487                         prefix, s->status);
2488 }
2489
2490 char *exec_command_line(char **argv) {
2491         size_t k;
2492         char *n, *p, **a;
2493         bool first = true;
2494
2495         assert(argv);
2496
2497         k = 1;
2498         STRV_FOREACH(a, argv)
2499                 k += strlen(*a)+3;
2500
2501         if (!(n = new(char, k)))
2502                 return NULL;
2503
2504         p = n;
2505         STRV_FOREACH(a, argv) {
2506
2507                 if (!first)
2508                         *(p++) = ' ';
2509                 else
2510                         first = false;
2511
2512                 if (strpbrk(*a, WHITESPACE)) {
2513                         *(p++) = '\'';
2514                         p = stpcpy(p, *a);
2515                         *(p++) = '\'';
2516                 } else
2517                         p = stpcpy(p, *a);
2518
2519         }
2520
2521         *p = 0;
2522
2523         /* FIXME: this doesn't really handle arguments that have
2524          * spaces and ticks in them */
2525
2526         return n;
2527 }
2528
2529 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2530         _cleanup_free_ char *cmd = NULL;
2531         const char *prefix2;
2532
2533         assert(c);
2534         assert(f);
2535
2536         prefix = strempty(prefix);
2537         prefix2 = strappenda(prefix, "\t");
2538
2539         cmd = exec_command_line(c->argv);
2540         fprintf(f,
2541                 "%sCommand Line: %s\n",
2542                 prefix, cmd ? cmd : strerror(ENOMEM));
2543
2544         exec_status_dump(&c->exec_status, f, prefix2);
2545 }
2546
2547 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2548         assert(f);
2549
2550         prefix = strempty(prefix);
2551
2552         LIST_FOREACH(command, c, c)
2553                 exec_command_dump(c, f, prefix);
2554 }
2555
2556 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2557         ExecCommand *end;
2558
2559         assert(l);
2560         assert(e);
2561
2562         if (*l) {
2563                 /* It's kind of important, that we keep the order here */
2564                 LIST_FIND_TAIL(command, *l, end);
2565                 LIST_INSERT_AFTER(command, *l, end, e);
2566         } else
2567               *l = e;
2568 }
2569
2570 int exec_command_set(ExecCommand *c, const char *path, ...) {
2571         va_list ap;
2572         char **l, *p;
2573
2574         assert(c);
2575         assert(path);
2576
2577         va_start(ap, path);
2578         l = strv_new_ap(path, ap);
2579         va_end(ap);
2580
2581         if (!l)
2582                 return -ENOMEM;
2583
2584         p = strdup(path);
2585         if (!p) {
2586                 strv_free(l);
2587                 return -ENOMEM;
2588         }
2589
2590         free(c->path);
2591         c->path = p;
2592
2593         strv_free(c->argv);
2594         c->argv = l;
2595
2596         return 0;
2597 }
2598
2599 int exec_command_append(ExecCommand *c, const char *path, ...) {
2600         _cleanup_strv_free_ char **l = NULL;
2601         va_list ap;
2602         int r;
2603
2604         assert(c);
2605         assert(path);
2606
2607         va_start(ap, path);
2608         l = strv_new_ap(path, ap);
2609         va_end(ap);
2610
2611         if (!l)
2612                 return -ENOMEM;
2613
2614         r = strv_extend_strv(&c->argv, l);
2615         if (r < 0)
2616                 return r;
2617
2618         return 0;
2619 }
2620
2621
2622 static int exec_runtime_allocate(ExecRuntime **rt) {
2623
2624         if (*rt)
2625                 return 0;
2626
2627         *rt = new0(ExecRuntime, 1);
2628         if (!*rt)
2629                 return -ENOMEM;
2630
2631         (*rt)->n_ref = 1;
2632         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2633
2634         return 0;
2635 }
2636
2637 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2638         int r;
2639
2640         assert(rt);
2641         assert(c);
2642         assert(id);
2643
2644         if (*rt)
2645                 return 1;
2646
2647         if (!c->private_network && !c->private_tmp)
2648                 return 0;
2649
2650         r = exec_runtime_allocate(rt);
2651         if (r < 0)
2652                 return r;
2653
2654         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2655                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2656                         return -errno;
2657         }
2658
2659         if (c->private_tmp && !(*rt)->tmp_dir) {
2660                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2661                 if (r < 0)
2662                         return r;
2663         }
2664
2665         return 1;
2666 }
2667
2668 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2669         assert(r);
2670         assert(r->n_ref > 0);
2671
2672         r->n_ref++;
2673         return r;
2674 }
2675
2676 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2677
2678         if (!r)
2679                 return NULL;
2680
2681         assert(r->n_ref > 0);
2682
2683         r->n_ref--;
2684         if (r->n_ref <= 0) {
2685                 free(r->tmp_dir);
2686                 free(r->var_tmp_dir);
2687                 safe_close_pair(r->netns_storage_socket);
2688                 free(r);
2689         }
2690
2691         return NULL;
2692 }
2693
2694 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2695         assert(u);
2696         assert(f);
2697         assert(fds);
2698
2699         if (!rt)
2700                 return 0;
2701
2702         if (rt->tmp_dir)
2703                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2704
2705         if (rt->var_tmp_dir)
2706                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2707
2708         if (rt->netns_storage_socket[0] >= 0) {
2709                 int copy;
2710
2711                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2712                 if (copy < 0)
2713                         return copy;
2714
2715                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2716         }
2717
2718         if (rt->netns_storage_socket[1] >= 0) {
2719                 int copy;
2720
2721                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2722                 if (copy < 0)
2723                         return copy;
2724
2725                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2726         }
2727
2728         return 0;
2729 }
2730
2731 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2732         int r;
2733
2734         assert(rt);
2735         assert(key);
2736         assert(value);
2737
2738         if (streq(key, "tmp-dir")) {
2739                 char *copy;
2740
2741                 r = exec_runtime_allocate(rt);
2742                 if (r < 0)
2743                         return r;
2744
2745                 copy = strdup(value);
2746                 if (!copy)
2747                         return log_oom();
2748
2749                 free((*rt)->tmp_dir);
2750                 (*rt)->tmp_dir = copy;
2751
2752         } else if (streq(key, "var-tmp-dir")) {
2753                 char *copy;
2754
2755                 r = exec_runtime_allocate(rt);
2756                 if (r < 0)
2757                         return r;
2758
2759                 copy = strdup(value);
2760                 if (!copy)
2761                         return log_oom();
2762
2763                 free((*rt)->var_tmp_dir);
2764                 (*rt)->var_tmp_dir = copy;
2765
2766         } else if (streq(key, "netns-socket-0")) {
2767                 int fd;
2768
2769                 r = exec_runtime_allocate(rt);
2770                 if (r < 0)
2771                         return r;
2772
2773                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2774                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2775                 else {
2776                         safe_close((*rt)->netns_storage_socket[0]);
2777                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2778                 }
2779         } else if (streq(key, "netns-socket-1")) {
2780                 int fd;
2781
2782                 r = exec_runtime_allocate(rt);
2783                 if (r < 0)
2784                         return r;
2785
2786                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2787                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2788                 else {
2789                         safe_close((*rt)->netns_storage_socket[1]);
2790                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2791                 }
2792         } else
2793                 return 0;
2794
2795         return 1;
2796 }
2797
2798 static void *remove_tmpdir_thread(void *p) {
2799         _cleanup_free_ char *path = p;
2800
2801         rm_rf_dangerous(path, false, true, false);
2802         return NULL;
2803 }
2804
2805 void exec_runtime_destroy(ExecRuntime *rt) {
2806         int r;
2807
2808         if (!rt)
2809                 return;
2810
2811         /* If there are multiple users of this, let's leave the stuff around */
2812         if (rt->n_ref > 1)
2813                 return;
2814
2815         if (rt->tmp_dir) {
2816                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2817
2818                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2819                 if (r < 0) {
2820                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2821                         free(rt->tmp_dir);
2822                 }
2823
2824                 rt->tmp_dir = NULL;
2825         }
2826
2827         if (rt->var_tmp_dir) {
2828                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2829
2830                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2831                 if (r < 0) {
2832                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2833                         free(rt->var_tmp_dir);
2834                 }
2835
2836                 rt->var_tmp_dir = NULL;
2837         }
2838
2839         safe_close_pair(rt->netns_storage_socket);
2840 }
2841
2842 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2843         [EXEC_INPUT_NULL] = "null",
2844         [EXEC_INPUT_TTY] = "tty",
2845         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2846         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2847         [EXEC_INPUT_SOCKET] = "socket"
2848 };
2849
2850 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2851
2852 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2853         [EXEC_OUTPUT_INHERIT] = "inherit",
2854         [EXEC_OUTPUT_NULL] = "null",
2855         [EXEC_OUTPUT_TTY] = "tty",
2856         [EXEC_OUTPUT_SYSLOG] = "syslog",
2857         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2858         [EXEC_OUTPUT_KMSG] = "kmsg",
2859         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2860         [EXEC_OUTPUT_JOURNAL] = "journal",
2861         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2862         [EXEC_OUTPUT_SOCKET] = "socket"
2863 };
2864
2865 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);