chiark / gitweb /
b7ac4c7b2bae3fb20412bf789d51b06c6588aac0
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
88 #include "label.h"
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103         int start, restart_from;
104
105         if (n_fds <= 0)
106                 return 0;
107
108         /* Modifies the fds array! (sorts it) */
109
110         assert(fds);
111
112         start = 0;
113         for (;;) {
114                 int i;
115
116                 restart_from = -1;
117
118                 for (i = start; i < (int) n_fds; i++) {
119                         int nfd;
120
121                         /* Already at right index? */
122                         if (fds[i] == i+3)
123                                 continue;
124
125                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
126                                 return -errno;
127
128                         safe_close(fds[i]);
129                         fds[i] = nfd;
130
131                         /* Hmm, the fd we wanted isn't free? Then
132                          * let's remember that and try again from here*/
133                         if (nfd != i+3 && restart_from < 0)
134                                 restart_from = i;
135                 }
136
137                 if (restart_from < 0)
138                         break;
139
140                 start = restart_from;
141         }
142
143         return 0;
144 }
145
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
147         unsigned i;
148         int r;
149
150         if (n_fds <= 0)
151                 return 0;
152
153         assert(fds);
154
155         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156
157         for (i = 0; i < n_fds; i++) {
158
159                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
160                         return r;
161
162                 /* We unconditionally drop FD_CLOEXEC from the fds,
163                  * since after all we want to pass these fds to our
164                  * children */
165
166                 if ((r = fd_cloexec(fds[i], false)) < 0)
167                         return r;
168         }
169
170         return 0;
171 }
172
173 _pure_ static const char *tty_path(const ExecContext *context) {
174         assert(context);
175
176         if (context->tty_path)
177                 return context->tty_path;
178
179         return "/dev/console";
180 }
181
182 static void exec_context_tty_reset(const ExecContext *context) {
183         assert(context);
184
185         if (context->tty_vhangup)
186                 terminal_vhangup(tty_path(context));
187
188         if (context->tty_reset)
189                 reset_terminal(tty_path(context));
190
191         if (context->tty_vt_disallocate && context->tty_path)
192                 vt_disallocate(context->tty_path);
193 }
194
195 static bool is_terminal_output(ExecOutput o) {
196         return
197                 o == EXEC_OUTPUT_TTY ||
198                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
201 }
202
203 static int open_null_as(int flags, int nfd) {
204         int fd, r;
205
206         assert(nfd >= 0);
207
208         fd = open("/dev/null", flags|O_NOCTTY);
209         if (fd < 0)
210                 return -errno;
211
212         if (fd != nfd) {
213                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
214                 safe_close(fd);
215         } else
216                 r = nfd;
217
218         return r;
219 }
220
221 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
222         int fd, r;
223         union sockaddr_union sa = {
224                 .un.sun_family = AF_UNIX,
225                 .un.sun_path = "/run/systemd/journal/stdout",
226         };
227
228         assert(context);
229         assert(output < _EXEC_OUTPUT_MAX);
230         assert(ident);
231         assert(nfd >= 0);
232
233         fd = socket(AF_UNIX, SOCK_STREAM, 0);
234         if (fd < 0)
235                 return -errno;
236
237         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
238         if (r < 0) {
239                 safe_close(fd);
240                 return -errno;
241         }
242
243         if (shutdown(fd, SHUT_RD) < 0) {
244                 safe_close(fd);
245                 return -errno;
246         }
247
248         fd_inc_sndbuf(fd, SNDBUF_SIZE);
249
250         dprintf(fd,
251                 "%s\n"
252                 "%s\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n"
256                 "%i\n"
257                 "%i\n",
258                 context->syslog_identifier ? context->syslog_identifier : ident,
259                 unit_id,
260                 context->syslog_priority,
261                 !!context->syslog_level_prefix,
262                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
263                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
264                 is_terminal_output(output));
265
266         if (fd != nfd) {
267                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268                 safe_close(fd);
269         } else
270                 r = nfd;
271
272         return r;
273 }
274 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
275         int fd, r;
276
277         assert(path);
278         assert(nfd >= 0);
279
280         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281                 return fd;
282
283         if (fd != nfd) {
284                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
285                 safe_close(fd);
286         } else
287                 r = nfd;
288
289         return r;
290 }
291
292 static bool is_terminal_input(ExecInput i) {
293         return
294                 i == EXEC_INPUT_TTY ||
295                 i == EXEC_INPUT_TTY_FORCE ||
296                 i == EXEC_INPUT_TTY_FAIL;
297 }
298
299 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
300
301         if (is_terminal_input(std_input) && !apply_tty_stdin)
302                 return EXEC_INPUT_NULL;
303
304         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
305                 return EXEC_INPUT_NULL;
306
307         return std_input;
308 }
309
310 static int fixup_output(ExecOutput std_output, int socket_fd) {
311
312         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
313                 return EXEC_OUTPUT_INHERIT;
314
315         return std_output;
316 }
317
318 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
319         ExecInput i;
320
321         assert(context);
322
323         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324
325         switch (i) {
326
327         case EXEC_INPUT_NULL:
328                 return open_null_as(O_RDONLY, STDIN_FILENO);
329
330         case EXEC_INPUT_TTY:
331         case EXEC_INPUT_TTY_FORCE:
332         case EXEC_INPUT_TTY_FAIL: {
333                 int fd, r;
334
335                 fd = acquire_terminal(tty_path(context),
336                                       i == EXEC_INPUT_TTY_FAIL,
337                                       i == EXEC_INPUT_TTY_FORCE,
338                                       false,
339                                       USEC_INFINITY);
340                 if (fd < 0)
341                         return fd;
342
343                 if (fd != STDIN_FILENO) {
344                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
345                         safe_close(fd);
346                 } else
347                         r = STDIN_FILENO;
348
349                 return r;
350         }
351
352         case EXEC_INPUT_SOCKET:
353                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
354
355         default:
356                 assert_not_reached("Unknown input type");
357         }
358 }
359
360 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
361         ExecOutput o;
362         ExecInput i;
363         int r;
364
365         assert(context);
366         assert(ident);
367
368         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
369         o = fixup_output(context->std_output, socket_fd);
370
371         if (fileno == STDERR_FILENO) {
372                 ExecOutput e;
373                 e = fixup_output(context->std_error, socket_fd);
374
375                 /* This expects the input and output are already set up */
376
377                 /* Don't change the stderr file descriptor if we inherit all
378                  * the way and are not on a tty */
379                 if (e == EXEC_OUTPUT_INHERIT &&
380                     o == EXEC_OUTPUT_INHERIT &&
381                     i == EXEC_INPUT_NULL &&
382                     !is_terminal_input(context->std_input) &&
383                     getppid () != 1)
384                         return fileno;
385
386                 /* Duplicate from stdout if possible */
387                 if (e == o || e == EXEC_OUTPUT_INHERIT)
388                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389
390                 o = e;
391
392         } else if (o == EXEC_OUTPUT_INHERIT) {
393                 /* If input got downgraded, inherit the original value */
394                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
395                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
396
397                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
398                 if (i != EXEC_INPUT_NULL)
399                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
400
401                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402                 if (getppid() != 1)
403                         return fileno;
404
405                 /* We need to open /dev/null here anew, to get the right access mode. */
406                 return open_null_as(O_WRONLY, fileno);
407         }
408
409         switch (o) {
410
411         case EXEC_OUTPUT_NULL:
412                 return open_null_as(O_WRONLY, fileno);
413
414         case EXEC_OUTPUT_TTY:
415                 if (is_terminal_input(i))
416                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
417
418                 /* We don't reset the terminal if this is just about output */
419                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
420
421         case EXEC_OUTPUT_SYSLOG:
422         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
423         case EXEC_OUTPUT_KMSG:
424         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
425         case EXEC_OUTPUT_JOURNAL:
426         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
427                 r = connect_logger_as(context, o, ident, unit_id, fileno);
428                 if (r < 0) {
429                         log_unit_struct(unit_id,
430                                         LOG_CRIT,
431                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
432                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
433                                                     unit_id, strerror(-r)),
434                                         LOG_ERRNO(-r),
435                                         NULL);
436                         r = open_null_as(O_WRONLY, fileno);
437                 }
438                 return r;
439
440         case EXEC_OUTPUT_SOCKET:
441                 assert(socket_fd >= 0);
442                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
443
444         default:
445                 assert_not_reached("Unknown error type");
446         }
447 }
448
449 static int chown_terminal(int fd, uid_t uid) {
450         struct stat st;
451
452         assert(fd >= 0);
453
454         /* This might fail. What matters are the results. */
455         (void) fchown(fd, uid, -1);
456         (void) fchmod(fd, TTY_MODE);
457
458         if (fstat(fd, &st) < 0)
459                 return -errno;
460
461         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
462                 return -EPERM;
463
464         return 0;
465 }
466
467 static int setup_confirm_stdio(int *_saved_stdin,
468                                int *_saved_stdout) {
469         int fd = -1, saved_stdin, saved_stdout = -1, r;
470
471         assert(_saved_stdin);
472         assert(_saved_stdout);
473
474         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
475         if (saved_stdin < 0)
476                 return -errno;
477
478         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
479         if (saved_stdout < 0) {
480                 r = errno;
481                 goto fail;
482         }
483
484         fd = acquire_terminal(
485                         "/dev/console",
486                         false,
487                         false,
488                         false,
489                         DEFAULT_CONFIRM_USEC);
490         if (fd < 0) {
491                 r = fd;
492                 goto fail;
493         }
494
495         r = chown_terminal(fd, getuid());
496         if (r < 0)
497                 goto fail;
498
499         if (dup2(fd, STDIN_FILENO) < 0) {
500                 r = -errno;
501                 goto fail;
502         }
503
504         if (dup2(fd, STDOUT_FILENO) < 0) {
505                 r = -errno;
506                 goto fail;
507         }
508
509         if (fd >= 2)
510                 safe_close(fd);
511
512         *_saved_stdin = saved_stdin;
513         *_saved_stdout = saved_stdout;
514
515         return 0;
516
517 fail:
518         safe_close(saved_stdout);
519         safe_close(saved_stdin);
520         safe_close(fd);
521
522         return r;
523 }
524
525 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
526         _cleanup_close_ int fd = -1;
527         va_list ap;
528
529         assert(format);
530
531         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
532         if (fd < 0)
533                 return fd;
534
535         va_start(ap, format);
536         vdprintf(fd, format, ap);
537         va_end(ap);
538
539         return 0;
540 }
541
542 static int restore_confirm_stdio(int *saved_stdin,
543                                  int *saved_stdout) {
544
545         int r = 0;
546
547         assert(saved_stdin);
548         assert(saved_stdout);
549
550         release_terminal();
551
552         if (*saved_stdin >= 0)
553                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
554                         r = -errno;
555
556         if (*saved_stdout >= 0)
557                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
558                         r = -errno;
559
560         safe_close(*saved_stdin);
561         safe_close(*saved_stdout);
562
563         return r;
564 }
565
566 static int ask_for_confirmation(char *response, char **argv) {
567         int saved_stdout = -1, saved_stdin = -1, r;
568         _cleanup_free_ char *line = NULL;
569
570         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
571         if (r < 0)
572                 return r;
573
574         line = exec_command_line(argv);
575         if (!line)
576                 return -ENOMEM;
577
578         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
579
580         restore_confirm_stdio(&saved_stdin, &saved_stdout);
581
582         return r;
583 }
584
585 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
586         bool keep_groups = false;
587         int r;
588
589         assert(context);
590
591         /* Lookup and set GID and supplementary group list. Here too
592          * we avoid NSS lookups for gid=0. */
593
594         if (context->group || username) {
595
596                 if (context->group) {
597                         const char *g = context->group;
598
599                         if ((r = get_group_creds(&g, &gid)) < 0)
600                                 return r;
601                 }
602
603                 /* First step, initialize groups from /etc/groups */
604                 if (username && gid != 0) {
605                         if (initgroups(username, gid) < 0)
606                                 return -errno;
607
608                         keep_groups = true;
609                 }
610
611                 /* Second step, set our gids */
612                 if (setresgid(gid, gid, gid) < 0)
613                         return -errno;
614         }
615
616         if (context->supplementary_groups) {
617                 int ngroups_max, k;
618                 gid_t *gids;
619                 char **i;
620
621                 /* Final step, initialize any manually set supplementary groups */
622                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
623
624                 if (!(gids = new(gid_t, ngroups_max)))
625                         return -ENOMEM;
626
627                 if (keep_groups) {
628                         if ((k = getgroups(ngroups_max, gids)) < 0) {
629                                 free(gids);
630                                 return -errno;
631                         }
632                 } else
633                         k = 0;
634
635                 STRV_FOREACH(i, context->supplementary_groups) {
636                         const char *g;
637
638                         if (k >= ngroups_max) {
639                                 free(gids);
640                                 return -E2BIG;
641                         }
642
643                         g = *i;
644                         r = get_group_creds(&g, gids+k);
645                         if (r < 0) {
646                                 free(gids);
647                                 return r;
648                         }
649
650                         k++;
651                 }
652
653                 if (setgroups(k, gids) < 0) {
654                         free(gids);
655                         return -errno;
656                 }
657
658                 free(gids);
659         }
660
661         return 0;
662 }
663
664 static int enforce_user(const ExecContext *context, uid_t uid) {
665         assert(context);
666
667         /* Sets (but doesn't lookup) the uid and make sure we keep the
668          * capabilities while doing so. */
669
670         if (context->capabilities) {
671                 _cleanup_cap_free_ cap_t d = NULL;
672                 static const cap_value_t bits[] = {
673                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
674                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
675                 };
676
677                 /* First step: If we need to keep capabilities but
678                  * drop privileges we need to make sure we keep our
679                  * caps, while we drop privileges. */
680                 if (uid != 0) {
681                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
682
683                         if (prctl(PR_GET_SECUREBITS) != sb)
684                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
685                                         return -errno;
686                 }
687
688                 /* Second step: set the capabilities. This will reduce
689                  * the capabilities to the minimum we need. */
690
691                 d = cap_dup(context->capabilities);
692                 if (!d)
693                         return -errno;
694
695                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
696                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
697                         return -errno;
698
699                 if (cap_set_proc(d) < 0)
700                         return -errno;
701         }
702
703         /* Third step: actually set the uids */
704         if (setresuid(uid, uid, uid) < 0)
705                 return -errno;
706
707         /* At this point we should have all necessary capabilities but
708            are otherwise a normal user. However, the caps might got
709            corrupted due to the setresuid() so we need clean them up
710            later. This is done outside of this call. */
711
712         return 0;
713 }
714
715 #ifdef HAVE_PAM
716
717 static int null_conv(
718                 int num_msg,
719                 const struct pam_message **msg,
720                 struct pam_response **resp,
721                 void *appdata_ptr) {
722
723         /* We don't support conversations */
724
725         return PAM_CONV_ERR;
726 }
727
728 static int setup_pam(
729                 const char *name,
730                 const char *user,
731                 uid_t uid,
732                 const char *tty,
733                 char ***pam_env,
734                 int fds[], unsigned n_fds) {
735
736         static const struct pam_conv conv = {
737                 .conv = null_conv,
738                 .appdata_ptr = NULL
739         };
740
741         pam_handle_t *handle = NULL;
742         sigset_t ss, old_ss;
743         int pam_code = PAM_SUCCESS;
744         int err;
745         char **e = NULL;
746         bool close_session = false;
747         pid_t pam_pid = 0, parent_pid;
748         int flags = 0;
749
750         assert(name);
751         assert(user);
752         assert(pam_env);
753
754         /* We set up PAM in the parent process, then fork. The child
755          * will then stay around until killed via PR_GET_PDEATHSIG or
756          * systemd via the cgroup logic. It will then remove the PAM
757          * session again. The parent process will exec() the actual
758          * daemon. We do things this way to ensure that the main PID
759          * of the daemon is the one we initially fork()ed. */
760
761         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
762                 flags |= PAM_SILENT;
763
764         pam_code = pam_start(name, user, &conv, &handle);
765         if (pam_code != PAM_SUCCESS) {
766                 handle = NULL;
767                 goto fail;
768         }
769
770         if (tty) {
771                 pam_code = pam_set_item(handle, PAM_TTY, tty);
772                 if (pam_code != PAM_SUCCESS)
773                         goto fail;
774         }
775
776         pam_code = pam_acct_mgmt(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         pam_code = pam_open_session(handle, flags);
781         if (pam_code != PAM_SUCCESS)
782                 goto fail;
783
784         close_session = true;
785
786         e = pam_getenvlist(handle);
787         if (!e) {
788                 pam_code = PAM_BUF_ERR;
789                 goto fail;
790         }
791
792         /* Block SIGTERM, so that we know that it won't get lost in
793          * the child */
794         if (sigemptyset(&ss) < 0 ||
795             sigaddset(&ss, SIGTERM) < 0 ||
796             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
797                 goto fail;
798
799         parent_pid = getpid();
800
801         pam_pid = fork();
802         if (pam_pid < 0)
803                 goto fail;
804
805         if (pam_pid == 0) {
806                 int sig;
807                 int r = EXIT_PAM;
808
809                 /* The child's job is to reset the PAM session on
810                  * termination */
811
812                 /* This string must fit in 10 chars (i.e. the length
813                  * of "/sbin/init"), to look pretty in /bin/ps */
814                 rename_process("(sd-pam)");
815
816                 /* Make sure we don't keep open the passed fds in this
817                 child. We assume that otherwise only those fds are
818                 open here that have been opened by PAM. */
819                 close_many(fds, n_fds);
820
821                 /* Drop privileges - we don't need any to pam_close_session
822                  * and this will make PR_SET_PDEATHSIG work in most cases.
823                  * If this fails, ignore the error - but expect sd-pam threads
824                  * to fail to exit normally */
825                 if (setresuid(uid, uid, uid) < 0)
826                         log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
827
828                 /* Wait until our parent died. This will only work if
829                  * the above setresuid() succeeds, otherwise the kernel
830                  * will not allow unprivileged parents kill their privileged
831                  * children this way. We rely on the control groups kill logic
832                  * to do the rest for us. */
833                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
834                         goto child_finish;
835
836                 /* Check if our parent process might already have
837                  * died? */
838                 if (getppid() == parent_pid) {
839                         for (;;) {
840                                 if (sigwait(&ss, &sig) < 0) {
841                                         if (errno == EINTR)
842                                                 continue;
843
844                                         goto child_finish;
845                                 }
846
847                                 assert(sig == SIGTERM);
848                                 break;
849                         }
850                 }
851
852                 /* If our parent died we'll end the session */
853                 if (getppid() != parent_pid) {
854                         pam_code = pam_close_session(handle, flags);
855                         if (pam_code != PAM_SUCCESS)
856                                 goto child_finish;
857                 }
858
859                 r = 0;
860
861         child_finish:
862                 pam_end(handle, pam_code | flags);
863                 _exit(r);
864         }
865
866         /* If the child was forked off successfully it will do all the
867          * cleanups, so forget about the handle here. */
868         handle = NULL;
869
870         /* Unblock SIGTERM again in the parent */
871         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
872                 goto fail;
873
874         /* We close the log explicitly here, since the PAM modules
875          * might have opened it, but we don't want this fd around. */
876         closelog();
877
878         *pam_env = e;
879         e = NULL;
880
881         return 0;
882
883 fail:
884         if (pam_code != PAM_SUCCESS) {
885                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
886                 err = -EPERM;  /* PAM errors do not map to errno */
887         } else {
888                 log_error_errno(errno, "PAM failed: %m");
889                 err = -errno;
890         }
891
892         if (handle) {
893                 if (close_session)
894                         pam_code = pam_close_session(handle, flags);
895
896                 pam_end(handle, pam_code | flags);
897         }
898
899         strv_free(e);
900
901         closelog();
902
903         if (pam_pid > 1) {
904                 kill(pam_pid, SIGTERM);
905                 kill(pam_pid, SIGCONT);
906         }
907
908         return err;
909 }
910 #endif
911
912 static void rename_process_from_path(const char *path) {
913         char process_name[11];
914         const char *p;
915         size_t l;
916
917         /* This resulting string must fit in 10 chars (i.e. the length
918          * of "/sbin/init") to look pretty in /bin/ps */
919
920         p = basename(path);
921         if (isempty(p)) {
922                 rename_process("(...)");
923                 return;
924         }
925
926         l = strlen(p);
927         if (l > 8) {
928                 /* The end of the process name is usually more
929                  * interesting, since the first bit might just be
930                  * "systemd-" */
931                 p = p + l - 8;
932                 l = 8;
933         }
934
935         process_name[0] = '(';
936         memcpy(process_name+1, p, l);
937         process_name[1+l] = ')';
938         process_name[1+l+1] = 0;
939
940         rename_process(process_name);
941 }
942
943 #ifdef HAVE_SECCOMP
944
945 static int apply_seccomp(const ExecContext *c) {
946         uint32_t negative_action, action;
947         scmp_filter_ctx *seccomp;
948         Iterator i;
949         void *id;
950         int r;
951
952         assert(c);
953
954         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
955
956         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
957         if (!seccomp)
958                 return -ENOMEM;
959
960         if (c->syscall_archs) {
961
962                 SET_FOREACH(id, c->syscall_archs, i) {
963                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
964                         if (r == -EEXIST)
965                                 continue;
966                         if (r < 0)
967                                 goto finish;
968                 }
969
970         } else {
971                 r = seccomp_add_secondary_archs(seccomp);
972                 if (r < 0)
973                         goto finish;
974         }
975
976         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
977         SET_FOREACH(id, c->syscall_filter, i) {
978                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
979                 if (r < 0)
980                         goto finish;
981         }
982
983         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
984         if (r < 0)
985                 goto finish;
986
987         r = seccomp_load(seccomp);
988
989 finish:
990         seccomp_release(seccomp);
991         return r;
992 }
993
994 static int apply_address_families(const ExecContext *c) {
995         scmp_filter_ctx *seccomp;
996         Iterator i;
997         int r;
998
999         assert(c);
1000
1001         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1002         if (!seccomp)
1003                 return -ENOMEM;
1004
1005         r = seccomp_add_secondary_archs(seccomp);
1006         if (r < 0)
1007                 goto finish;
1008
1009         if (c->address_families_whitelist) {
1010                 int af, first = 0, last = 0;
1011                 void *afp;
1012
1013                 /* If this is a whitelist, we first block the address
1014                  * families that are out of range and then everything
1015                  * that is not in the set. First, we find the lowest
1016                  * and highest address family in the set. */
1017
1018                 SET_FOREACH(afp, c->address_families, i) {
1019                         af = PTR_TO_INT(afp);
1020
1021                         if (af <= 0 || af >= af_max())
1022                                 continue;
1023
1024                         if (first == 0 || af < first)
1025                                 first = af;
1026
1027                         if (last == 0 || af > last)
1028                                 last = af;
1029                 }
1030
1031                 assert((first == 0) == (last == 0));
1032
1033                 if (first == 0) {
1034
1035                         /* No entries in the valid range, block everything */
1036                         r = seccomp_rule_add(
1037                                         seccomp,
1038                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1039                                         SCMP_SYS(socket),
1040                                         0);
1041                         if (r < 0)
1042                                 goto finish;
1043
1044                 } else {
1045
1046                         /* Block everything below the first entry */
1047                         r = seccomp_rule_add(
1048                                         seccomp,
1049                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1050                                         SCMP_SYS(socket),
1051                                         1,
1052                                         SCMP_A0(SCMP_CMP_LT, first));
1053                         if (r < 0)
1054                                 goto finish;
1055
1056                         /* Block everything above the last entry */
1057                         r = seccomp_rule_add(
1058                                         seccomp,
1059                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1060                                         SCMP_SYS(socket),
1061                                         1,
1062                                         SCMP_A0(SCMP_CMP_GT, last));
1063                         if (r < 0)
1064                                 goto finish;
1065
1066                         /* Block everything between the first and last
1067                          * entry */
1068                         for (af = 1; af < af_max(); af++) {
1069
1070                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1071                                         continue;
1072
1073                                 r = seccomp_rule_add(
1074                                                 seccomp,
1075                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076                                                 SCMP_SYS(socket),
1077                                                 1,
1078                                                 SCMP_A0(SCMP_CMP_EQ, af));
1079                                 if (r < 0)
1080                                         goto finish;
1081                         }
1082                 }
1083
1084         } else {
1085                 void *af;
1086
1087                 /* If this is a blacklist, then generate one rule for
1088                  * each address family that are then combined in OR
1089                  * checks. */
1090
1091                 SET_FOREACH(af, c->address_families, i) {
1092
1093                         r = seccomp_rule_add(
1094                                         seccomp,
1095                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1096                                         SCMP_SYS(socket),
1097                                         1,
1098                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1099                         if (r < 0)
1100                                 goto finish;
1101                 }
1102         }
1103
1104         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1105         if (r < 0)
1106                 goto finish;
1107
1108         r = seccomp_load(seccomp);
1109
1110 finish:
1111         seccomp_release(seccomp);
1112         return r;
1113 }
1114
1115 #endif
1116
1117 static void do_idle_pipe_dance(int idle_pipe[4]) {
1118         assert(idle_pipe);
1119
1120
1121         safe_close(idle_pipe[1]);
1122         safe_close(idle_pipe[2]);
1123
1124         if (idle_pipe[0] >= 0) {
1125                 int r;
1126
1127                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1128
1129                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1130                         /* Signal systemd that we are bored and want to continue. */
1131                         write(idle_pipe[3], "x", 1);
1132
1133                         /* Wait for systemd to react to the signal above. */
1134                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1135                 }
1136
1137                 safe_close(idle_pipe[0]);
1138
1139         }
1140
1141         safe_close(idle_pipe[3]);
1142 }
1143
1144 static int build_environment(
1145                 const ExecContext *c,
1146                 unsigned n_fds,
1147                 usec_t watchdog_usec,
1148                 const char *home,
1149                 const char *username,
1150                 const char *shell,
1151                 char ***ret) {
1152
1153         _cleanup_strv_free_ char **our_env = NULL;
1154         unsigned n_env = 0;
1155         char *x;
1156
1157         assert(c);
1158         assert(ret);
1159
1160         our_env = new0(char*, 10);
1161         if (!our_env)
1162                 return -ENOMEM;
1163
1164         if (n_fds > 0) {
1165                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1166                         return -ENOMEM;
1167                 our_env[n_env++] = x;
1168
1169                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1170                         return -ENOMEM;
1171                 our_env[n_env++] = x;
1172         }
1173
1174         if (watchdog_usec > 0) {
1175                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1176                         return -ENOMEM;
1177                 our_env[n_env++] = x;
1178
1179                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1180                         return -ENOMEM;
1181                 our_env[n_env++] = x;
1182         }
1183
1184         if (home) {
1185                 x = strappend("HOME=", home);
1186                 if (!x)
1187                         return -ENOMEM;
1188                 our_env[n_env++] = x;
1189         }
1190
1191         if (username) {
1192                 x = strappend("LOGNAME=", username);
1193                 if (!x)
1194                         return -ENOMEM;
1195                 our_env[n_env++] = x;
1196
1197                 x = strappend("USER=", username);
1198                 if (!x)
1199                         return -ENOMEM;
1200                 our_env[n_env++] = x;
1201         }
1202
1203         if (shell) {
1204                 x = strappend("SHELL=", shell);
1205                 if (!x)
1206                         return -ENOMEM;
1207                 our_env[n_env++] = x;
1208         }
1209
1210         if (is_terminal_input(c->std_input) ||
1211             c->std_output == EXEC_OUTPUT_TTY ||
1212             c->std_error == EXEC_OUTPUT_TTY ||
1213             c->tty_path) {
1214
1215                 x = strdup(default_term_for_tty(tty_path(c)));
1216                 if (!x)
1217                         return -ENOMEM;
1218                 our_env[n_env++] = x;
1219         }
1220
1221         our_env[n_env++] = NULL;
1222         assert(n_env <= 10);
1223
1224         *ret = our_env;
1225         our_env = NULL;
1226
1227         return 0;
1228 }
1229
1230 static int exec_child(ExecCommand *command,
1231                       const ExecContext *context,
1232                       const ExecParameters *params,
1233                       ExecRuntime *runtime,
1234                       char **argv,
1235                       int socket_fd,
1236                       int *fds, unsigned n_fds,
1237                       char **files_env,
1238                       int *error) {
1239
1240         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1241         _cleanup_free_ char *mac_selinux_context_net = NULL;
1242         const char *username = NULL, *home = NULL, *shell = NULL;
1243         unsigned n_dont_close = 0;
1244         int dont_close[n_fds + 4];
1245         uid_t uid = UID_INVALID;
1246         gid_t gid = GID_INVALID;
1247         int i, err;
1248
1249         assert(command);
1250         assert(context);
1251         assert(params);
1252         assert(error);
1253
1254         rename_process_from_path(command->path);
1255
1256         /* We reset exactly these signals, since they are the
1257          * only ones we set to SIG_IGN in the main daemon. All
1258          * others we leave untouched because we set them to
1259          * SIG_DFL or a valid handler initially, both of which
1260          * will be demoted to SIG_DFL. */
1261         default_signals(SIGNALS_CRASH_HANDLER,
1262                         SIGNALS_IGNORE, -1);
1263
1264         if (context->ignore_sigpipe)
1265                 ignore_signals(SIGPIPE, -1);
1266
1267         err = reset_signal_mask();
1268         if (err < 0) {
1269                 *error = EXIT_SIGNAL_MASK;
1270                 return err;
1271         }
1272
1273         if (params->idle_pipe)
1274                 do_idle_pipe_dance(params->idle_pipe);
1275
1276         /* Close sockets very early to make sure we don't
1277          * block init reexecution because it cannot bind its
1278          * sockets */
1279         log_forget_fds();
1280
1281         if (socket_fd >= 0)
1282                 dont_close[n_dont_close++] = socket_fd;
1283         if (n_fds > 0) {
1284                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1285                 n_dont_close += n_fds;
1286         }
1287         if (params->bus_endpoint_fd >= 0)
1288                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1289         if (runtime) {
1290                 if (runtime->netns_storage_socket[0] >= 0)
1291                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1292                 if (runtime->netns_storage_socket[1] >= 0)
1293                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1294         }
1295
1296         err = close_all_fds(dont_close, n_dont_close);
1297         if (err < 0) {
1298                 *error = EXIT_FDS;
1299                 return err;
1300         }
1301
1302         if (!context->same_pgrp)
1303                 if (setsid() < 0) {
1304                         *error = EXIT_SETSID;
1305                         return -errno;
1306                 }
1307
1308         exec_context_tty_reset(context);
1309
1310         if (params->confirm_spawn) {
1311                 char response;
1312
1313                 err = ask_for_confirmation(&response, argv);
1314                 if (err == -ETIMEDOUT)
1315                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1316                 else if (err < 0)
1317                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1318                 else if (response == 's') {
1319                         write_confirm_message("Skipping execution.\n");
1320                         *error = EXIT_CONFIRM;
1321                         return -ECANCELED;
1322                 } else if (response == 'n') {
1323                         write_confirm_message("Failing execution.\n");
1324                         *error = 0;
1325                         return 0;
1326                 }
1327         }
1328
1329         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1330          * must sure to drop O_NONBLOCK */
1331         if (socket_fd >= 0)
1332                 fd_nonblock(socket_fd, false);
1333
1334         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1335         if (err < 0) {
1336                 *error = EXIT_STDIN;
1337                 return err;
1338         }
1339
1340         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1341         if (err < 0) {
1342                 *error = EXIT_STDOUT;
1343                 return err;
1344         }
1345
1346         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1347         if (err < 0) {
1348                 *error = EXIT_STDERR;
1349                 return err;
1350         }
1351
1352         if (params->cgroup_path) {
1353                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1354                 if (err < 0) {
1355                         *error = EXIT_CGROUP;
1356                         return err;
1357                 }
1358         }
1359
1360         if (context->oom_score_adjust_set) {
1361                 char t[16];
1362
1363                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1364                 char_array_0(t);
1365
1366                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1367                         *error = EXIT_OOM_ADJUST;
1368                         return -errno;
1369                 }
1370         }
1371
1372         if (context->nice_set)
1373                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1374                         *error = EXIT_NICE;
1375                         return -errno;
1376                 }
1377
1378         if (context->cpu_sched_set) {
1379                 struct sched_param param = {
1380                         .sched_priority = context->cpu_sched_priority,
1381                 };
1382
1383                 err = sched_setscheduler(0,
1384                                          context->cpu_sched_policy |
1385                                          (context->cpu_sched_reset_on_fork ?
1386                                           SCHED_RESET_ON_FORK : 0),
1387                                          &param);
1388                 if (err < 0) {
1389                         *error = EXIT_SETSCHEDULER;
1390                         return -errno;
1391                 }
1392         }
1393
1394         if (context->cpuset)
1395                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1396                         *error = EXIT_CPUAFFINITY;
1397                         return -errno;
1398                 }
1399
1400         if (context->ioprio_set)
1401                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1402                         *error = EXIT_IOPRIO;
1403                         return -errno;
1404                 }
1405
1406         if (context->timer_slack_nsec != NSEC_INFINITY)
1407                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1408                         *error = EXIT_TIMERSLACK;
1409                         return -errno;
1410                 }
1411
1412         if (context->personality != 0xffffffffUL)
1413                 if (personality(context->personality) < 0) {
1414                         *error = EXIT_PERSONALITY;
1415                         return -errno;
1416                 }
1417
1418         if (context->utmp_id)
1419                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1420
1421         if (context->user) {
1422                 username = context->user;
1423                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1424                 if (err < 0) {
1425                         *error = EXIT_USER;
1426                         return err;
1427                 }
1428
1429                 if (is_terminal_input(context->std_input)) {
1430                         err = chown_terminal(STDIN_FILENO, uid);
1431                         if (err < 0) {
1432                                 *error = EXIT_STDIN;
1433                                 return err;
1434                         }
1435                 }
1436         }
1437
1438 #ifdef ENABLE_KDBUS
1439         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1440                 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1441
1442                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1443                 if (err < 0) {
1444                         *error = EXIT_BUS_ENDPOINT;
1445                         return err;
1446                 }
1447         }
1448 #endif
1449
1450         /* If delegation is enabled we'll pass ownership of the cgroup
1451          * (but only in systemd's own controller hierarchy!) to the
1452          * user of the new process. */
1453         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1454                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1455                 if (err < 0) {
1456                         *error = EXIT_CGROUP;
1457                         return err;
1458                 }
1459
1460
1461                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1462                 if (err < 0) {
1463                         *error = EXIT_CGROUP;
1464                         return err;
1465                 }
1466         }
1467
1468         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1469                 char **rt;
1470
1471                 STRV_FOREACH(rt, context->runtime_directory) {
1472                         _cleanup_free_ char *p;
1473
1474                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1475                         if (!p) {
1476                                 *error = EXIT_RUNTIME_DIRECTORY;
1477                                 return -ENOMEM;
1478                         }
1479
1480                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1481                         if (err < 0) {
1482                                 *error = EXIT_RUNTIME_DIRECTORY;
1483                                 return err;
1484                         }
1485                 }
1486         }
1487
1488         if (params->apply_permissions) {
1489                 err = enforce_groups(context, username, gid);
1490                 if (err < 0) {
1491                         *error = EXIT_GROUP;
1492                         return err;
1493                 }
1494         }
1495
1496         umask(context->umask);
1497
1498 #ifdef HAVE_PAM
1499         if (params->apply_permissions && context->pam_name && username) {
1500                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1501                 if (err < 0) {
1502                         *error = EXIT_PAM;
1503                         return err;
1504                 }
1505         }
1506 #endif
1507
1508         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1509                 err = setup_netns(runtime->netns_storage_socket);
1510                 if (err < 0) {
1511                         *error = EXIT_NETWORK;
1512                         return err;
1513                 }
1514         }
1515
1516         if (!strv_isempty(context->read_write_dirs) ||
1517             !strv_isempty(context->read_only_dirs) ||
1518             !strv_isempty(context->inaccessible_dirs) ||
1519             context->mount_flags != 0 ||
1520             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1521             params->bus_endpoint_path ||
1522             context->private_devices ||
1523             context->protect_system != PROTECT_SYSTEM_NO ||
1524             context->protect_home != PROTECT_HOME_NO) {
1525
1526                 char *tmp = NULL, *var = NULL;
1527
1528                 /* The runtime struct only contains the parent
1529                  * of the private /tmp, which is
1530                  * non-accessible to world users. Inside of it
1531                  * there's a /tmp that is sticky, and that's
1532                  * the one we want to use here. */
1533
1534                 if (context->private_tmp && runtime) {
1535                         if (runtime->tmp_dir)
1536                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1537                         if (runtime->var_tmp_dir)
1538                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1539                 }
1540
1541                 err = setup_namespace(
1542                                 context->read_write_dirs,
1543                                 context->read_only_dirs,
1544                                 context->inaccessible_dirs,
1545                                 tmp,
1546                                 var,
1547                                 params->bus_endpoint_path,
1548                                 context->private_devices,
1549                                 context->protect_home,
1550                                 context->protect_system,
1551                                 context->mount_flags);
1552
1553                 if (err == -EPERM)
1554                         log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1555                 else if (err < 0) {
1556                         *error = EXIT_NAMESPACE;
1557                         return err;
1558                 }
1559         }
1560
1561         if (params->apply_chroot) {
1562                 if (context->root_directory)
1563                         if (chroot(context->root_directory) < 0) {
1564                                 *error = EXIT_CHROOT;
1565                                 return -errno;
1566                         }
1567
1568                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1569                         *error = EXIT_CHDIR;
1570                         return -errno;
1571                 }
1572         } else {
1573                 _cleanup_free_ char *d = NULL;
1574
1575                 if (asprintf(&d, "%s/%s",
1576                              context->root_directory ? context->root_directory : "",
1577                              context->working_directory ? context->working_directory : "") < 0) {
1578                         *error = EXIT_MEMORY;
1579                         return -ENOMEM;
1580                 }
1581
1582                 if (chdir(d) < 0) {
1583                         *error = EXIT_CHDIR;
1584                         return -errno;
1585                 }
1586         }
1587
1588 #ifdef HAVE_SELINUX
1589         if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1590                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1591                 if (err < 0) {
1592                         *error = EXIT_SELINUX_CONTEXT;
1593                         return err;
1594                 }
1595         }
1596 #endif
1597
1598         /* We repeat the fd closing here, to make sure that
1599          * nothing is leaked from the PAM modules. Note that
1600          * we are more aggressive this time since socket_fd
1601          * and the netns fds we don't need anymore. The custom
1602          * endpoint fd was needed to upload the policy and can
1603          * now be closed as well. */
1604         err = close_all_fds(fds, n_fds);
1605         if (err >= 0)
1606                 err = shift_fds(fds, n_fds);
1607         if (err >= 0)
1608                 err = flags_fds(fds, n_fds, context->non_blocking);
1609         if (err < 0) {
1610                 *error = EXIT_FDS;
1611                 return err;
1612         }
1613
1614         if (params->apply_permissions) {
1615
1616                 for (i = 0; i < _RLIMIT_MAX; i++) {
1617                         if (!context->rlimit[i])
1618                                 continue;
1619
1620                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1621                                 *error = EXIT_LIMITS;
1622                                 return -errno;
1623                         }
1624                 }
1625
1626                 if (context->capability_bounding_set_drop) {
1627                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1628                         if (err < 0) {
1629                                 *error = EXIT_CAPABILITIES;
1630                                 return err;
1631                         }
1632                 }
1633
1634 #ifdef HAVE_SMACK
1635                 if (context->smack_process_label) {
1636                         err = mac_smack_apply_pid(0, context->smack_process_label);
1637                         if (err < 0) {
1638                                 *error = EXIT_SMACK_PROCESS_LABEL;
1639                                 return err;
1640                         }
1641                 }
1642 #endif
1643
1644                 if (context->user) {
1645                         err = enforce_user(context, uid);
1646                         if (err < 0) {
1647                                 *error = EXIT_USER;
1648                                 return err;
1649                         }
1650                 }
1651
1652                 /* PR_GET_SECUREBITS is not privileged, while
1653                  * PR_SET_SECUREBITS is. So to suppress
1654                  * potential EPERMs we'll try not to call
1655                  * PR_SET_SECUREBITS unless necessary. */
1656                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1657                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1658                                 *error = EXIT_SECUREBITS;
1659                                 return -errno;
1660                         }
1661
1662                 if (context->capabilities)
1663                         if (cap_set_proc(context->capabilities) < 0) {
1664                                 *error = EXIT_CAPABILITIES;
1665                                 return -errno;
1666                         }
1667
1668                 if (context->no_new_privileges)
1669                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1670                                 *error = EXIT_NO_NEW_PRIVILEGES;
1671                                 return -errno;
1672                         }
1673
1674 #ifdef HAVE_SECCOMP
1675                 if (context->address_families_whitelist ||
1676                     !set_isempty(context->address_families)) {
1677                         err = apply_address_families(context);
1678                         if (err < 0) {
1679                                 *error = EXIT_ADDRESS_FAMILIES;
1680                                 return err;
1681                         }
1682                 }
1683
1684                 if (context->syscall_whitelist ||
1685                     !set_isempty(context->syscall_filter) ||
1686                     !set_isempty(context->syscall_archs)) {
1687                         err = apply_seccomp(context);
1688                         if (err < 0) {
1689                                 *error = EXIT_SECCOMP;
1690                                 return err;
1691                         }
1692                 }
1693 #endif
1694
1695 #ifdef HAVE_SELINUX
1696                 if (mac_selinux_use()) {
1697                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1698
1699                         if (exec_context) {
1700                                 err = setexeccon(exec_context);
1701                                 if (err < 0) {
1702                                         *error = EXIT_SELINUX_CONTEXT;
1703                                         return err;
1704                                 }
1705                         }
1706                 }
1707 #endif
1708
1709 #ifdef HAVE_APPARMOR
1710                 if (context->apparmor_profile && mac_apparmor_use()) {
1711                         err = aa_change_onexec(context->apparmor_profile);
1712                         if (err < 0 && !context->apparmor_profile_ignore) {
1713                                 *error = EXIT_APPARMOR_PROFILE;
1714                                 return -errno;
1715                         }
1716                 }
1717 #endif
1718         }
1719
1720         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1721         if (err < 0) {
1722                 *error = EXIT_MEMORY;
1723                 return err;
1724         }
1725
1726         final_env = strv_env_merge(5,
1727                                    params->environment,
1728                                    our_env,
1729                                    context->environment,
1730                                    files_env,
1731                                    pam_env,
1732                                    NULL);
1733         if (!final_env) {
1734                 *error = EXIT_MEMORY;
1735                 return -ENOMEM;
1736         }
1737
1738         final_argv = replace_env_argv(argv, final_env);
1739         if (!final_argv) {
1740                 *error = EXIT_MEMORY;
1741                 return -ENOMEM;
1742         }
1743
1744         final_env = strv_env_clean(final_env);
1745
1746         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1747                 _cleanup_free_ char *line;
1748
1749                 line = exec_command_line(final_argv);
1750                 if (line) {
1751                         log_open();
1752                         log_unit_struct(params->unit_id,
1753                                         LOG_DEBUG,
1754                                         "EXECUTABLE=%s", command->path,
1755                                         LOG_MESSAGE("Executing: %s", line),
1756                                         NULL);
1757                         log_close();
1758                 }
1759         }
1760         execve(command->path, final_argv, final_env);
1761         *error = EXIT_EXEC;
1762         return -errno;
1763 }
1764
1765 int exec_spawn(ExecCommand *command,
1766                const ExecContext *context,
1767                const ExecParameters *params,
1768                ExecRuntime *runtime,
1769                pid_t *ret) {
1770
1771         _cleanup_strv_free_ char **files_env = NULL;
1772         int *fds = NULL; unsigned n_fds = 0;
1773         char *line, **argv;
1774         int socket_fd;
1775         pid_t pid;
1776         int err;
1777
1778         assert(command);
1779         assert(context);
1780         assert(ret);
1781         assert(params);
1782         assert(params->fds || params->n_fds <= 0);
1783
1784         if (context->std_input == EXEC_INPUT_SOCKET ||
1785             context->std_output == EXEC_OUTPUT_SOCKET ||
1786             context->std_error == EXEC_OUTPUT_SOCKET) {
1787
1788                 if (params->n_fds != 1)
1789                         return -EINVAL;
1790
1791                 socket_fd = params->fds[0];
1792         } else {
1793                 socket_fd = -1;
1794                 fds = params->fds;
1795                 n_fds = params->n_fds;
1796         }
1797
1798         err = exec_context_load_environment(context, params->unit_id, &files_env);
1799         if (err < 0) {
1800                 log_unit_struct(params->unit_id,
1801                                 LOG_ERR,
1802                                 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1803                                 LOG_ERRNO(-err),
1804                                 NULL);
1805                 return err;
1806         }
1807
1808         argv = params->argv ?: command->argv;
1809
1810         line = exec_command_line(argv);
1811         if (!line)
1812                 return log_oom();
1813
1814         log_unit_struct(params->unit_id,
1815                         LOG_DEBUG,
1816                         "EXECUTABLE=%s", command->path,
1817                         LOG_MESSAGE("About to execute: %s", line),
1818                         NULL);
1819         free(line);
1820
1821         pid = fork();
1822         if (pid < 0)
1823                 return -errno;
1824
1825         if (pid == 0) {
1826                 int r;
1827
1828                 err = exec_child(command,
1829                                  context,
1830                                  params,
1831                                  runtime,
1832                                  argv,
1833                                  socket_fd,
1834                                  fds, n_fds,
1835                                  files_env,
1836                                  &r);
1837                 if (r != 0) {
1838                         log_open();
1839                         log_struct(LOG_ERR,
1840                                    LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1841                                    "EXECUTABLE=%s", command->path,
1842                                    LOG_MESSAGE("Failed at step %s spawning %s: %s",
1843                                                exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1844                                                command->path, strerror(-err)),
1845                                    LOG_ERRNO(-err),
1846                                    NULL);
1847                         log_close();
1848                 }
1849
1850                 _exit(r);
1851         }
1852
1853         log_unit_struct(params->unit_id,
1854                         LOG_DEBUG,
1855                         LOG_MESSAGE("Forked %s as "PID_FMT,
1856                                     command->path, pid),
1857                         NULL);
1858
1859         /* We add the new process to the cgroup both in the child (so
1860          * that we can be sure that no user code is ever executed
1861          * outside of the cgroup) and in the parent (so that we can be
1862          * sure that when we kill the cgroup the process will be
1863          * killed too). */
1864         if (params->cgroup_path)
1865                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1866
1867         exec_status_start(&command->exec_status, pid);
1868
1869         *ret = pid;
1870         return 0;
1871 }
1872
1873 void exec_context_init(ExecContext *c) {
1874         assert(c);
1875
1876         c->umask = 0022;
1877         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1878         c->cpu_sched_policy = SCHED_OTHER;
1879         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1880         c->syslog_level_prefix = true;
1881         c->ignore_sigpipe = true;
1882         c->timer_slack_nsec = NSEC_INFINITY;
1883         c->personality = 0xffffffffUL;
1884         c->runtime_directory_mode = 0755;
1885 }
1886
1887 void exec_context_done(ExecContext *c) {
1888         unsigned l;
1889
1890         assert(c);
1891
1892         strv_free(c->environment);
1893         c->environment = NULL;
1894
1895         strv_free(c->environment_files);
1896         c->environment_files = NULL;
1897
1898         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1899                 free(c->rlimit[l]);
1900                 c->rlimit[l] = NULL;
1901         }
1902
1903         free(c->working_directory);
1904         c->working_directory = NULL;
1905         free(c->root_directory);
1906         c->root_directory = NULL;
1907
1908         free(c->tty_path);
1909         c->tty_path = NULL;
1910
1911         free(c->syslog_identifier);
1912         c->syslog_identifier = NULL;
1913
1914         free(c->user);
1915         c->user = NULL;
1916
1917         free(c->group);
1918         c->group = NULL;
1919
1920         strv_free(c->supplementary_groups);
1921         c->supplementary_groups = NULL;
1922
1923         free(c->pam_name);
1924         c->pam_name = NULL;
1925
1926         if (c->capabilities) {
1927                 cap_free(c->capabilities);
1928                 c->capabilities = NULL;
1929         }
1930
1931         strv_free(c->read_only_dirs);
1932         c->read_only_dirs = NULL;
1933
1934         strv_free(c->read_write_dirs);
1935         c->read_write_dirs = NULL;
1936
1937         strv_free(c->inaccessible_dirs);
1938         c->inaccessible_dirs = NULL;
1939
1940         if (c->cpuset)
1941                 CPU_FREE(c->cpuset);
1942
1943         free(c->utmp_id);
1944         c->utmp_id = NULL;
1945
1946         free(c->selinux_context);
1947         c->selinux_context = NULL;
1948
1949         free(c->apparmor_profile);
1950         c->apparmor_profile = NULL;
1951
1952         set_free(c->syscall_filter);
1953         c->syscall_filter = NULL;
1954
1955         set_free(c->syscall_archs);
1956         c->syscall_archs = NULL;
1957
1958         set_free(c->address_families);
1959         c->address_families = NULL;
1960
1961         strv_free(c->runtime_directory);
1962         c->runtime_directory = NULL;
1963
1964         bus_endpoint_free(c->bus_endpoint);
1965         c->bus_endpoint = NULL;
1966 }
1967
1968 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1969         char **i;
1970
1971         assert(c);
1972
1973         if (!runtime_prefix)
1974                 return 0;
1975
1976         STRV_FOREACH(i, c->runtime_directory) {
1977                 _cleanup_free_ char *p;
1978
1979                 p = strjoin(runtime_prefix, "/", *i, NULL);
1980                 if (!p)
1981                         return -ENOMEM;
1982
1983                 /* We execute this synchronously, since we need to be
1984                  * sure this is gone when we start the service
1985                  * next. */
1986                 rm_rf_dangerous(p, false, true, false);
1987         }
1988
1989         return 0;
1990 }
1991
1992 void exec_command_done(ExecCommand *c) {
1993         assert(c);
1994
1995         free(c->path);
1996         c->path = NULL;
1997
1998         strv_free(c->argv);
1999         c->argv = NULL;
2000 }
2001
2002 void exec_command_done_array(ExecCommand *c, unsigned n) {
2003         unsigned i;
2004
2005         for (i = 0; i < n; i++)
2006                 exec_command_done(c+i);
2007 }
2008
2009 void exec_command_free_list(ExecCommand *c) {
2010         ExecCommand *i;
2011
2012         while ((i = c)) {
2013                 LIST_REMOVE(command, c, i);
2014                 exec_command_done(i);
2015                 free(i);
2016         }
2017 }
2018
2019 void exec_command_free_array(ExecCommand **c, unsigned n) {
2020         unsigned i;
2021
2022         for (i = 0; i < n; i++) {
2023                 exec_command_free_list(c[i]);
2024                 c[i] = NULL;
2025         }
2026 }
2027
2028 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2029         char **i, **r = NULL;
2030
2031         assert(c);
2032         assert(l);
2033
2034         STRV_FOREACH(i, c->environment_files) {
2035                 char *fn;
2036                 int k;
2037                 bool ignore = false;
2038                 char **p;
2039                 _cleanup_globfree_ glob_t pglob = {};
2040                 int count, n;
2041
2042                 fn = *i;
2043
2044                 if (fn[0] == '-') {
2045                         ignore = true;
2046                         fn ++;
2047                 }
2048
2049                 if (!path_is_absolute(fn)) {
2050                         if (ignore)
2051                                 continue;
2052
2053                         strv_free(r);
2054                         return -EINVAL;
2055                 }
2056
2057                 /* Filename supports globbing, take all matching files */
2058                 errno = 0;
2059                 if (glob(fn, 0, NULL, &pglob) != 0) {
2060                         if (ignore)
2061                                 continue;
2062
2063                         strv_free(r);
2064                         return errno ? -errno : -EINVAL;
2065                 }
2066                 count = pglob.gl_pathc;
2067                 if (count == 0) {
2068                         if (ignore)
2069                                 continue;
2070
2071                         strv_free(r);
2072                         return -EINVAL;
2073                 }
2074                 for (n = 0; n < count; n++) {
2075                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2076                         if (k < 0) {
2077                                 if (ignore)
2078                                         continue;
2079
2080                                 strv_free(r);
2081                                 return k;
2082                         }
2083                         /* Log invalid environment variables with filename */
2084                         if (p)
2085                                 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2086
2087                         if (r == NULL)
2088                                 r = p;
2089                         else {
2090                                 char **m;
2091
2092                                 m = strv_env_merge(2, r, p);
2093                                 strv_free(r);
2094                                 strv_free(p);
2095                                 if (!m)
2096                                         return -ENOMEM;
2097
2098                                 r = m;
2099                         }
2100                 }
2101         }
2102
2103         *l = r;
2104
2105         return 0;
2106 }
2107
2108 static bool tty_may_match_dev_console(const char *tty) {
2109         _cleanup_free_ char *active = NULL;
2110        char *console;
2111
2112         if (startswith(tty, "/dev/"))
2113                 tty += 5;
2114
2115         /* trivial identity? */
2116         if (streq(tty, "console"))
2117                 return true;
2118
2119         console = resolve_dev_console(&active);
2120         /* if we could not resolve, assume it may */
2121         if (!console)
2122                 return true;
2123
2124         /* "tty0" means the active VC, so it may be the same sometimes */
2125         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2126 }
2127
2128 bool exec_context_may_touch_console(ExecContext *ec) {
2129         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2130                 is_terminal_input(ec->std_input) ||
2131                 is_terminal_output(ec->std_output) ||
2132                 is_terminal_output(ec->std_error)) &&
2133                tty_may_match_dev_console(tty_path(ec));
2134 }
2135
2136 static void strv_fprintf(FILE *f, char **l) {
2137         char **g;
2138
2139         assert(f);
2140
2141         STRV_FOREACH(g, l)
2142                 fprintf(f, " %s", *g);
2143 }
2144
2145 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2146         char **e;
2147         unsigned i;
2148
2149         assert(c);
2150         assert(f);
2151
2152         prefix = strempty(prefix);
2153
2154         fprintf(f,
2155                 "%sUMask: %04o\n"
2156                 "%sWorkingDirectory: %s\n"
2157                 "%sRootDirectory: %s\n"
2158                 "%sNonBlocking: %s\n"
2159                 "%sPrivateTmp: %s\n"
2160                 "%sPrivateNetwork: %s\n"
2161                 "%sPrivateDevices: %s\n"
2162                 "%sProtectHome: %s\n"
2163                 "%sProtectSystem: %s\n"
2164                 "%sIgnoreSIGPIPE: %s\n",
2165                 prefix, c->umask,
2166                 prefix, c->working_directory ? c->working_directory : "/",
2167                 prefix, c->root_directory ? c->root_directory : "/",
2168                 prefix, yes_no(c->non_blocking),
2169                 prefix, yes_no(c->private_tmp),
2170                 prefix, yes_no(c->private_network),
2171                 prefix, yes_no(c->private_devices),
2172                 prefix, protect_home_to_string(c->protect_home),
2173                 prefix, protect_system_to_string(c->protect_system),
2174                 prefix, yes_no(c->ignore_sigpipe));
2175
2176         STRV_FOREACH(e, c->environment)
2177                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2178
2179         STRV_FOREACH(e, c->environment_files)
2180                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2181
2182         if (c->nice_set)
2183                 fprintf(f,
2184                         "%sNice: %i\n",
2185                         prefix, c->nice);
2186
2187         if (c->oom_score_adjust_set)
2188                 fprintf(f,
2189                         "%sOOMScoreAdjust: %i\n",
2190                         prefix, c->oom_score_adjust);
2191
2192         for (i = 0; i < RLIM_NLIMITS; i++)
2193                 if (c->rlimit[i])
2194                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2195                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2196
2197         if (c->ioprio_set) {
2198                 _cleanup_free_ char *class_str = NULL;
2199
2200                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2201                 fprintf(f,
2202                         "%sIOSchedulingClass: %s\n"
2203                         "%sIOPriority: %i\n",
2204                         prefix, strna(class_str),
2205                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2206         }
2207
2208         if (c->cpu_sched_set) {
2209                 _cleanup_free_ char *policy_str = NULL;
2210
2211                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2212                 fprintf(f,
2213                         "%sCPUSchedulingPolicy: %s\n"
2214                         "%sCPUSchedulingPriority: %i\n"
2215                         "%sCPUSchedulingResetOnFork: %s\n",
2216                         prefix, strna(policy_str),
2217                         prefix, c->cpu_sched_priority,
2218                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2219         }
2220
2221         if (c->cpuset) {
2222                 fprintf(f, "%sCPUAffinity:", prefix);
2223                 for (i = 0; i < c->cpuset_ncpus; i++)
2224                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2225                                 fprintf(f, " %u", i);
2226                 fputs("\n", f);
2227         }
2228
2229         if (c->timer_slack_nsec != NSEC_INFINITY)
2230                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2231
2232         fprintf(f,
2233                 "%sStandardInput: %s\n"
2234                 "%sStandardOutput: %s\n"
2235                 "%sStandardError: %s\n",
2236                 prefix, exec_input_to_string(c->std_input),
2237                 prefix, exec_output_to_string(c->std_output),
2238                 prefix, exec_output_to_string(c->std_error));
2239
2240         if (c->tty_path)
2241                 fprintf(f,
2242                         "%sTTYPath: %s\n"
2243                         "%sTTYReset: %s\n"
2244                         "%sTTYVHangup: %s\n"
2245                         "%sTTYVTDisallocate: %s\n",
2246                         prefix, c->tty_path,
2247                         prefix, yes_no(c->tty_reset),
2248                         prefix, yes_no(c->tty_vhangup),
2249                         prefix, yes_no(c->tty_vt_disallocate));
2250
2251         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2252             c->std_output == EXEC_OUTPUT_KMSG ||
2253             c->std_output == EXEC_OUTPUT_JOURNAL ||
2254             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2255             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2256             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2257             c->std_error == EXEC_OUTPUT_SYSLOG ||
2258             c->std_error == EXEC_OUTPUT_KMSG ||
2259             c->std_error == EXEC_OUTPUT_JOURNAL ||
2260             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2261             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2262             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2263
2264                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2265
2266                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2267                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2268
2269                 fprintf(f,
2270                         "%sSyslogFacility: %s\n"
2271                         "%sSyslogLevel: %s\n",
2272                         prefix, strna(fac_str),
2273                         prefix, strna(lvl_str));
2274         }
2275
2276         if (c->capabilities) {
2277                 _cleanup_cap_free_charp_ char *t;
2278
2279                 t = cap_to_text(c->capabilities, NULL);
2280                 if (t)
2281                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2282         }
2283
2284         if (c->secure_bits)
2285                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2286                         prefix,
2287                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2288                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2289                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2290                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2291                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2292                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2293
2294         if (c->capability_bounding_set_drop) {
2295                 unsigned long l;
2296                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2297
2298                 for (l = 0; l <= cap_last_cap(); l++)
2299                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2300                                 _cleanup_cap_free_charp_ char *t;
2301
2302                                 t = cap_to_name(l);
2303                                 if (t)
2304                                         fprintf(f, " %s", t);
2305                         }
2306
2307                 fputs("\n", f);
2308         }
2309
2310         if (c->user)
2311                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2312         if (c->group)
2313                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2314
2315         if (strv_length(c->supplementary_groups) > 0) {
2316                 fprintf(f, "%sSupplementaryGroups:", prefix);
2317                 strv_fprintf(f, c->supplementary_groups);
2318                 fputs("\n", f);
2319         }
2320
2321         if (c->pam_name)
2322                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2323
2324         if (strv_length(c->read_write_dirs) > 0) {
2325                 fprintf(f, "%sReadWriteDirs:", prefix);
2326                 strv_fprintf(f, c->read_write_dirs);
2327                 fputs("\n", f);
2328         }
2329
2330         if (strv_length(c->read_only_dirs) > 0) {
2331                 fprintf(f, "%sReadOnlyDirs:", prefix);
2332                 strv_fprintf(f, c->read_only_dirs);
2333                 fputs("\n", f);
2334         }
2335
2336         if (strv_length(c->inaccessible_dirs) > 0) {
2337                 fprintf(f, "%sInaccessibleDirs:", prefix);
2338                 strv_fprintf(f, c->inaccessible_dirs);
2339                 fputs("\n", f);
2340         }
2341
2342         if (c->utmp_id)
2343                 fprintf(f,
2344                         "%sUtmpIdentifier: %s\n",
2345                         prefix, c->utmp_id);
2346
2347         if (c->selinux_context)
2348                 fprintf(f,
2349                         "%sSELinuxContext: %s%s\n",
2350                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2351
2352         if (c->personality != 0xffffffffUL)
2353                 fprintf(f,
2354                         "%sPersonality: %s\n",
2355                         prefix, strna(personality_to_string(c->personality)));
2356
2357         if (c->syscall_filter) {
2358 #ifdef HAVE_SECCOMP
2359                 Iterator j;
2360                 void *id;
2361                 bool first = true;
2362 #endif
2363
2364                 fprintf(f,
2365                         "%sSystemCallFilter: ",
2366                         prefix);
2367
2368                 if (!c->syscall_whitelist)
2369                         fputc('~', f);
2370
2371 #ifdef HAVE_SECCOMP
2372                 SET_FOREACH(id, c->syscall_filter, j) {
2373                         _cleanup_free_ char *name = NULL;
2374
2375                         if (first)
2376                                 first = false;
2377                         else
2378                                 fputc(' ', f);
2379
2380                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2381                         fputs(strna(name), f);
2382                 }
2383 #endif
2384
2385                 fputc('\n', f);
2386         }
2387
2388         if (c->syscall_archs) {
2389 #ifdef HAVE_SECCOMP
2390                 Iterator j;
2391                 void *id;
2392 #endif
2393
2394                 fprintf(f,
2395                         "%sSystemCallArchitectures:",
2396                         prefix);
2397
2398 #ifdef HAVE_SECCOMP
2399                 SET_FOREACH(id, c->syscall_archs, j)
2400                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2401 #endif
2402                 fputc('\n', f);
2403         }
2404
2405         if (c->syscall_errno != 0)
2406                 fprintf(f,
2407                         "%sSystemCallErrorNumber: %s\n",
2408                         prefix, strna(errno_to_name(c->syscall_errno)));
2409
2410         if (c->apparmor_profile)
2411                 fprintf(f,
2412                         "%sAppArmorProfile: %s%s\n",
2413                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2414 }
2415
2416 bool exec_context_maintains_privileges(ExecContext *c) {
2417         assert(c);
2418
2419         /* Returns true if the process forked off would run run under
2420          * an unchanged UID or as root. */
2421
2422         if (!c->user)
2423                 return true;
2424
2425         if (streq(c->user, "root") || streq(c->user, "0"))
2426                 return true;
2427
2428         return false;
2429 }
2430
2431 void exec_status_start(ExecStatus *s, pid_t pid) {
2432         assert(s);
2433
2434         zero(*s);
2435         s->pid = pid;
2436         dual_timestamp_get(&s->start_timestamp);
2437 }
2438
2439 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2440         assert(s);
2441
2442         if (s->pid && s->pid != pid)
2443                 zero(*s);
2444
2445         s->pid = pid;
2446         dual_timestamp_get(&s->exit_timestamp);
2447
2448         s->code = code;
2449         s->status = status;
2450
2451         if (context) {
2452                 if (context->utmp_id)
2453                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2454
2455                 exec_context_tty_reset(context);
2456         }
2457 }
2458
2459 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2460         char buf[FORMAT_TIMESTAMP_MAX];
2461
2462         assert(s);
2463         assert(f);
2464
2465         if (s->pid <= 0)
2466                 return;
2467
2468         prefix = strempty(prefix);
2469
2470         fprintf(f,
2471                 "%sPID: "PID_FMT"\n",
2472                 prefix, s->pid);
2473
2474         if (s->start_timestamp.realtime > 0)
2475                 fprintf(f,
2476                         "%sStart Timestamp: %s\n",
2477                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2478
2479         if (s->exit_timestamp.realtime > 0)
2480                 fprintf(f,
2481                         "%sExit Timestamp: %s\n"
2482                         "%sExit Code: %s\n"
2483                         "%sExit Status: %i\n",
2484                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2485                         prefix, sigchld_code_to_string(s->code),
2486                         prefix, s->status);
2487 }
2488
2489 char *exec_command_line(char **argv) {
2490         size_t k;
2491         char *n, *p, **a;
2492         bool first = true;
2493
2494         assert(argv);
2495
2496         k = 1;
2497         STRV_FOREACH(a, argv)
2498                 k += strlen(*a)+3;
2499
2500         if (!(n = new(char, k)))
2501                 return NULL;
2502
2503         p = n;
2504         STRV_FOREACH(a, argv) {
2505
2506                 if (!first)
2507                         *(p++) = ' ';
2508                 else
2509                         first = false;
2510
2511                 if (strpbrk(*a, WHITESPACE)) {
2512                         *(p++) = '\'';
2513                         p = stpcpy(p, *a);
2514                         *(p++) = '\'';
2515                 } else
2516                         p = stpcpy(p, *a);
2517
2518         }
2519
2520         *p = 0;
2521
2522         /* FIXME: this doesn't really handle arguments that have
2523          * spaces and ticks in them */
2524
2525         return n;
2526 }
2527
2528 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2529         _cleanup_free_ char *cmd = NULL;
2530         const char *prefix2;
2531
2532         assert(c);
2533         assert(f);
2534
2535         prefix = strempty(prefix);
2536         prefix2 = strappenda(prefix, "\t");
2537
2538         cmd = exec_command_line(c->argv);
2539         fprintf(f,
2540                 "%sCommand Line: %s\n",
2541                 prefix, cmd ? cmd : strerror(ENOMEM));
2542
2543         exec_status_dump(&c->exec_status, f, prefix2);
2544 }
2545
2546 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2547         assert(f);
2548
2549         prefix = strempty(prefix);
2550
2551         LIST_FOREACH(command, c, c)
2552                 exec_command_dump(c, f, prefix);
2553 }
2554
2555 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2556         ExecCommand *end;
2557
2558         assert(l);
2559         assert(e);
2560
2561         if (*l) {
2562                 /* It's kind of important, that we keep the order here */
2563                 LIST_FIND_TAIL(command, *l, end);
2564                 LIST_INSERT_AFTER(command, *l, end, e);
2565         } else
2566               *l = e;
2567 }
2568
2569 int exec_command_set(ExecCommand *c, const char *path, ...) {
2570         va_list ap;
2571         char **l, *p;
2572
2573         assert(c);
2574         assert(path);
2575
2576         va_start(ap, path);
2577         l = strv_new_ap(path, ap);
2578         va_end(ap);
2579
2580         if (!l)
2581                 return -ENOMEM;
2582
2583         p = strdup(path);
2584         if (!p) {
2585                 strv_free(l);
2586                 return -ENOMEM;
2587         }
2588
2589         free(c->path);
2590         c->path = p;
2591
2592         strv_free(c->argv);
2593         c->argv = l;
2594
2595         return 0;
2596 }
2597
2598 int exec_command_append(ExecCommand *c, const char *path, ...) {
2599         _cleanup_strv_free_ char **l = NULL;
2600         va_list ap;
2601         int r;
2602
2603         assert(c);
2604         assert(path);
2605
2606         va_start(ap, path);
2607         l = strv_new_ap(path, ap);
2608         va_end(ap);
2609
2610         if (!l)
2611                 return -ENOMEM;
2612
2613         r = strv_extend_strv(&c->argv, l);
2614         if (r < 0)
2615                 return r;
2616
2617         return 0;
2618 }
2619
2620
2621 static int exec_runtime_allocate(ExecRuntime **rt) {
2622
2623         if (*rt)
2624                 return 0;
2625
2626         *rt = new0(ExecRuntime, 1);
2627         if (!*rt)
2628                 return -ENOMEM;
2629
2630         (*rt)->n_ref = 1;
2631         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2632
2633         return 0;
2634 }
2635
2636 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2637         int r;
2638
2639         assert(rt);
2640         assert(c);
2641         assert(id);
2642
2643         if (*rt)
2644                 return 1;
2645
2646         if (!c->private_network && !c->private_tmp)
2647                 return 0;
2648
2649         r = exec_runtime_allocate(rt);
2650         if (r < 0)
2651                 return r;
2652
2653         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2654                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2655                         return -errno;
2656         }
2657
2658         if (c->private_tmp && !(*rt)->tmp_dir) {
2659                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2660                 if (r < 0)
2661                         return r;
2662         }
2663
2664         return 1;
2665 }
2666
2667 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2668         assert(r);
2669         assert(r->n_ref > 0);
2670
2671         r->n_ref++;
2672         return r;
2673 }
2674
2675 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2676
2677         if (!r)
2678                 return NULL;
2679
2680         assert(r->n_ref > 0);
2681
2682         r->n_ref--;
2683         if (r->n_ref <= 0) {
2684                 free(r->tmp_dir);
2685                 free(r->var_tmp_dir);
2686                 safe_close_pair(r->netns_storage_socket);
2687                 free(r);
2688         }
2689
2690         return NULL;
2691 }
2692
2693 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2694         assert(u);
2695         assert(f);
2696         assert(fds);
2697
2698         if (!rt)
2699                 return 0;
2700
2701         if (rt->tmp_dir)
2702                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2703
2704         if (rt->var_tmp_dir)
2705                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2706
2707         if (rt->netns_storage_socket[0] >= 0) {
2708                 int copy;
2709
2710                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2711                 if (copy < 0)
2712                         return copy;
2713
2714                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2715         }
2716
2717         if (rt->netns_storage_socket[1] >= 0) {
2718                 int copy;
2719
2720                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2721                 if (copy < 0)
2722                         return copy;
2723
2724                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2725         }
2726
2727         return 0;
2728 }
2729
2730 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2731         int r;
2732
2733         assert(rt);
2734         assert(key);
2735         assert(value);
2736
2737         if (streq(key, "tmp-dir")) {
2738                 char *copy;
2739
2740                 r = exec_runtime_allocate(rt);
2741                 if (r < 0)
2742                         return r;
2743
2744                 copy = strdup(value);
2745                 if (!copy)
2746                         return log_oom();
2747
2748                 free((*rt)->tmp_dir);
2749                 (*rt)->tmp_dir = copy;
2750
2751         } else if (streq(key, "var-tmp-dir")) {
2752                 char *copy;
2753
2754                 r = exec_runtime_allocate(rt);
2755                 if (r < 0)
2756                         return r;
2757
2758                 copy = strdup(value);
2759                 if (!copy)
2760                         return log_oom();
2761
2762                 free((*rt)->var_tmp_dir);
2763                 (*rt)->var_tmp_dir = copy;
2764
2765         } else if (streq(key, "netns-socket-0")) {
2766                 int fd;
2767
2768                 r = exec_runtime_allocate(rt);
2769                 if (r < 0)
2770                         return r;
2771
2772                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2773                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2774                 else {
2775                         safe_close((*rt)->netns_storage_socket[0]);
2776                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2777                 }
2778         } else if (streq(key, "netns-socket-1")) {
2779                 int fd;
2780
2781                 r = exec_runtime_allocate(rt);
2782                 if (r < 0)
2783                         return r;
2784
2785                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2786                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2787                 else {
2788                         safe_close((*rt)->netns_storage_socket[1]);
2789                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2790                 }
2791         } else
2792                 return 0;
2793
2794         return 1;
2795 }
2796
2797 static void *remove_tmpdir_thread(void *p) {
2798         _cleanup_free_ char *path = p;
2799
2800         rm_rf_dangerous(path, false, true, false);
2801         return NULL;
2802 }
2803
2804 void exec_runtime_destroy(ExecRuntime *rt) {
2805         int r;
2806
2807         if (!rt)
2808                 return;
2809
2810         /* If there are multiple users of this, let's leave the stuff around */
2811         if (rt->n_ref > 1)
2812                 return;
2813
2814         if (rt->tmp_dir) {
2815                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2816
2817                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2818                 if (r < 0) {
2819                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2820                         free(rt->tmp_dir);
2821                 }
2822
2823                 rt->tmp_dir = NULL;
2824         }
2825
2826         if (rt->var_tmp_dir) {
2827                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2828
2829                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2830                 if (r < 0) {
2831                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2832                         free(rt->var_tmp_dir);
2833                 }
2834
2835                 rt->var_tmp_dir = NULL;
2836         }
2837
2838         safe_close_pair(rt->netns_storage_socket);
2839 }
2840
2841 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2842         [EXEC_INPUT_NULL] = "null",
2843         [EXEC_INPUT_TTY] = "tty",
2844         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2845         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2846         [EXEC_INPUT_SOCKET] = "socket"
2847 };
2848
2849 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2850
2851 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2852         [EXEC_OUTPUT_INHERIT] = "inherit",
2853         [EXEC_OUTPUT_NULL] = "null",
2854         [EXEC_OUTPUT_TTY] = "tty",
2855         [EXEC_OUTPUT_SYSLOG] = "syslog",
2856         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2857         [EXEC_OUTPUT_KMSG] = "kmsg",
2858         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2859         [EXEC_OUTPUT_JOURNAL] = "journal",
2860         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2861         [EXEC_OUTPUT_SOCKET] = "socket"
2862 };
2863
2864 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);