chiark / gitweb /
log: fix order of log_unit_struct() to match other logging calls
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
88 #include "label.h"
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96
97 /* This assumes there is a 'tty' group */
98 #define TTY_MODE 0620
99
100 #define SNDBUF_SIZE (8*1024*1024)
101
102 static int shift_fds(int fds[], unsigned n_fds) {
103         int start, restart_from;
104
105         if (n_fds <= 0)
106                 return 0;
107
108         /* Modifies the fds array! (sorts it) */
109
110         assert(fds);
111
112         start = 0;
113         for (;;) {
114                 int i;
115
116                 restart_from = -1;
117
118                 for (i = start; i < (int) n_fds; i++) {
119                         int nfd;
120
121                         /* Already at right index? */
122                         if (fds[i] == i+3)
123                                 continue;
124
125                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
126                                 return -errno;
127
128                         safe_close(fds[i]);
129                         fds[i] = nfd;
130
131                         /* Hmm, the fd we wanted isn't free? Then
132                          * let's remember that and try again from here*/
133                         if (nfd != i+3 && restart_from < 0)
134                                 restart_from = i;
135                 }
136
137                 if (restart_from < 0)
138                         break;
139
140                 start = restart_from;
141         }
142
143         return 0;
144 }
145
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
147         unsigned i;
148         int r;
149
150         if (n_fds <= 0)
151                 return 0;
152
153         assert(fds);
154
155         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156
157         for (i = 0; i < n_fds; i++) {
158
159                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
160                         return r;
161
162                 /* We unconditionally drop FD_CLOEXEC from the fds,
163                  * since after all we want to pass these fds to our
164                  * children */
165
166                 if ((r = fd_cloexec(fds[i], false)) < 0)
167                         return r;
168         }
169
170         return 0;
171 }
172
173 _pure_ static const char *tty_path(const ExecContext *context) {
174         assert(context);
175
176         if (context->tty_path)
177                 return context->tty_path;
178
179         return "/dev/console";
180 }
181
182 static void exec_context_tty_reset(const ExecContext *context) {
183         assert(context);
184
185         if (context->tty_vhangup)
186                 terminal_vhangup(tty_path(context));
187
188         if (context->tty_reset)
189                 reset_terminal(tty_path(context));
190
191         if (context->tty_vt_disallocate && context->tty_path)
192                 vt_disallocate(context->tty_path);
193 }
194
195 static bool is_terminal_output(ExecOutput o) {
196         return
197                 o == EXEC_OUTPUT_TTY ||
198                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
201 }
202
203 static int open_null_as(int flags, int nfd) {
204         int fd, r;
205
206         assert(nfd >= 0);
207
208         fd = open("/dev/null", flags|O_NOCTTY);
209         if (fd < 0)
210                 return -errno;
211
212         if (fd != nfd) {
213                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
214                 safe_close(fd);
215         } else
216                 r = nfd;
217
218         return r;
219 }
220
221 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
222         int fd, r;
223         union sockaddr_union sa = {
224                 .un.sun_family = AF_UNIX,
225                 .un.sun_path = "/run/systemd/journal/stdout",
226         };
227
228         assert(context);
229         assert(output < _EXEC_OUTPUT_MAX);
230         assert(ident);
231         assert(nfd >= 0);
232
233         fd = socket(AF_UNIX, SOCK_STREAM, 0);
234         if (fd < 0)
235                 return -errno;
236
237         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
238         if (r < 0) {
239                 safe_close(fd);
240                 return -errno;
241         }
242
243         if (shutdown(fd, SHUT_RD) < 0) {
244                 safe_close(fd);
245                 return -errno;
246         }
247
248         fd_inc_sndbuf(fd, SNDBUF_SIZE);
249
250         dprintf(fd,
251                 "%s\n"
252                 "%s\n"
253                 "%i\n"
254                 "%i\n"
255                 "%i\n"
256                 "%i\n"
257                 "%i\n",
258                 context->syslog_identifier ? context->syslog_identifier : ident,
259                 unit_id,
260                 context->syslog_priority,
261                 !!context->syslog_level_prefix,
262                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
263                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
264                 is_terminal_output(output));
265
266         if (fd != nfd) {
267                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268                 safe_close(fd);
269         } else
270                 r = nfd;
271
272         return r;
273 }
274 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
275         int fd, r;
276
277         assert(path);
278         assert(nfd >= 0);
279
280         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281                 return fd;
282
283         if (fd != nfd) {
284                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
285                 safe_close(fd);
286         } else
287                 r = nfd;
288
289         return r;
290 }
291
292 static bool is_terminal_input(ExecInput i) {
293         return
294                 i == EXEC_INPUT_TTY ||
295                 i == EXEC_INPUT_TTY_FORCE ||
296                 i == EXEC_INPUT_TTY_FAIL;
297 }
298
299 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
300
301         if (is_terminal_input(std_input) && !apply_tty_stdin)
302                 return EXEC_INPUT_NULL;
303
304         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
305                 return EXEC_INPUT_NULL;
306
307         return std_input;
308 }
309
310 static int fixup_output(ExecOutput std_output, int socket_fd) {
311
312         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
313                 return EXEC_OUTPUT_INHERIT;
314
315         return std_output;
316 }
317
318 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
319         ExecInput i;
320
321         assert(context);
322
323         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324
325         switch (i) {
326
327         case EXEC_INPUT_NULL:
328                 return open_null_as(O_RDONLY, STDIN_FILENO);
329
330         case EXEC_INPUT_TTY:
331         case EXEC_INPUT_TTY_FORCE:
332         case EXEC_INPUT_TTY_FAIL: {
333                 int fd, r;
334
335                 fd = acquire_terminal(tty_path(context),
336                                       i == EXEC_INPUT_TTY_FAIL,
337                                       i == EXEC_INPUT_TTY_FORCE,
338                                       false,
339                                       USEC_INFINITY);
340                 if (fd < 0)
341                         return fd;
342
343                 if (fd != STDIN_FILENO) {
344                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
345                         safe_close(fd);
346                 } else
347                         r = STDIN_FILENO;
348
349                 return r;
350         }
351
352         case EXEC_INPUT_SOCKET:
353                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
354
355         default:
356                 assert_not_reached("Unknown input type");
357         }
358 }
359
360 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
361         ExecOutput o;
362         ExecInput i;
363         int r;
364
365         assert(context);
366         assert(ident);
367
368         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
369         o = fixup_output(context->std_output, socket_fd);
370
371         if (fileno == STDERR_FILENO) {
372                 ExecOutput e;
373                 e = fixup_output(context->std_error, socket_fd);
374
375                 /* This expects the input and output are already set up */
376
377                 /* Don't change the stderr file descriptor if we inherit all
378                  * the way and are not on a tty */
379                 if (e == EXEC_OUTPUT_INHERIT &&
380                     o == EXEC_OUTPUT_INHERIT &&
381                     i == EXEC_INPUT_NULL &&
382                     !is_terminal_input(context->std_input) &&
383                     getppid () != 1)
384                         return fileno;
385
386                 /* Duplicate from stdout if possible */
387                 if (e == o || e == EXEC_OUTPUT_INHERIT)
388                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389
390                 o = e;
391
392         } else if (o == EXEC_OUTPUT_INHERIT) {
393                 /* If input got downgraded, inherit the original value */
394                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
395                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
396
397                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
398                 if (i != EXEC_INPUT_NULL)
399                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
400
401                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402                 if (getppid() != 1)
403                         return fileno;
404
405                 /* We need to open /dev/null here anew, to get the right access mode. */
406                 return open_null_as(O_WRONLY, fileno);
407         }
408
409         switch (o) {
410
411         case EXEC_OUTPUT_NULL:
412                 return open_null_as(O_WRONLY, fileno);
413
414         case EXEC_OUTPUT_TTY:
415                 if (is_terminal_input(i))
416                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
417
418                 /* We don't reset the terminal if this is just about output */
419                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
420
421         case EXEC_OUTPUT_SYSLOG:
422         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
423         case EXEC_OUTPUT_KMSG:
424         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
425         case EXEC_OUTPUT_JOURNAL:
426         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
427                 r = connect_logger_as(context, o, ident, unit_id, fileno);
428                 if (r < 0) {
429                         log_unit_struct(unit_id,
430                                         LOG_CRIT,
431                                         LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
432                                                     fileno == STDOUT_FILENO ? "stdout" : "stderr",
433                                                     unit_id, strerror(-r)),
434                                         LOG_ERRNO(-r),
435                                         NULL);
436                         r = open_null_as(O_WRONLY, fileno);
437                 }
438                 return r;
439
440         case EXEC_OUTPUT_SOCKET:
441                 assert(socket_fd >= 0);
442                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
443
444         default:
445                 assert_not_reached("Unknown error type");
446         }
447 }
448
449 static int chown_terminal(int fd, uid_t uid) {
450         struct stat st;
451
452         assert(fd >= 0);
453
454         /* This might fail. What matters are the results. */
455         (void) fchown(fd, uid, -1);
456         (void) fchmod(fd, TTY_MODE);
457
458         if (fstat(fd, &st) < 0)
459                 return -errno;
460
461         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
462                 return -EPERM;
463
464         return 0;
465 }
466
467 static int setup_confirm_stdio(int *_saved_stdin,
468                                int *_saved_stdout) {
469         int fd = -1, saved_stdin, saved_stdout = -1, r;
470
471         assert(_saved_stdin);
472         assert(_saved_stdout);
473
474         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
475         if (saved_stdin < 0)
476                 return -errno;
477
478         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
479         if (saved_stdout < 0) {
480                 r = errno;
481                 goto fail;
482         }
483
484         fd = acquire_terminal(
485                         "/dev/console",
486                         false,
487                         false,
488                         false,
489                         DEFAULT_CONFIRM_USEC);
490         if (fd < 0) {
491                 r = fd;
492                 goto fail;
493         }
494
495         r = chown_terminal(fd, getuid());
496         if (r < 0)
497                 goto fail;
498
499         if (dup2(fd, STDIN_FILENO) < 0) {
500                 r = -errno;
501                 goto fail;
502         }
503
504         if (dup2(fd, STDOUT_FILENO) < 0) {
505                 r = -errno;
506                 goto fail;
507         }
508
509         if (fd >= 2)
510                 safe_close(fd);
511
512         *_saved_stdin = saved_stdin;
513         *_saved_stdout = saved_stdout;
514
515         return 0;
516
517 fail:
518         safe_close(saved_stdout);
519         safe_close(saved_stdin);
520         safe_close(fd);
521
522         return r;
523 }
524
525 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
526         _cleanup_close_ int fd = -1;
527         va_list ap;
528
529         assert(format);
530
531         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
532         if (fd < 0)
533                 return fd;
534
535         va_start(ap, format);
536         vdprintf(fd, format, ap);
537         va_end(ap);
538
539         return 0;
540 }
541
542 static int restore_confirm_stdio(int *saved_stdin,
543                                  int *saved_stdout) {
544
545         int r = 0;
546
547         assert(saved_stdin);
548         assert(saved_stdout);
549
550         release_terminal();
551
552         if (*saved_stdin >= 0)
553                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
554                         r = -errno;
555
556         if (*saved_stdout >= 0)
557                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
558                         r = -errno;
559
560         safe_close(*saved_stdin);
561         safe_close(*saved_stdout);
562
563         return r;
564 }
565
566 static int ask_for_confirmation(char *response, char **argv) {
567         int saved_stdout = -1, saved_stdin = -1, r;
568         _cleanup_free_ char *line = NULL;
569
570         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
571         if (r < 0)
572                 return r;
573
574         line = exec_command_line(argv);
575         if (!line)
576                 return -ENOMEM;
577
578         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
579
580         restore_confirm_stdio(&saved_stdin, &saved_stdout);
581
582         return r;
583 }
584
585 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
586         bool keep_groups = false;
587         int r;
588
589         assert(context);
590
591         /* Lookup and set GID and supplementary group list. Here too
592          * we avoid NSS lookups for gid=0. */
593
594         if (context->group || username) {
595
596                 if (context->group) {
597                         const char *g = context->group;
598
599                         if ((r = get_group_creds(&g, &gid)) < 0)
600                                 return r;
601                 }
602
603                 /* First step, initialize groups from /etc/groups */
604                 if (username && gid != 0) {
605                         if (initgroups(username, gid) < 0)
606                                 return -errno;
607
608                         keep_groups = true;
609                 }
610
611                 /* Second step, set our gids */
612                 if (setresgid(gid, gid, gid) < 0)
613                         return -errno;
614         }
615
616         if (context->supplementary_groups) {
617                 int ngroups_max, k;
618                 gid_t *gids;
619                 char **i;
620
621                 /* Final step, initialize any manually set supplementary groups */
622                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
623
624                 if (!(gids = new(gid_t, ngroups_max)))
625                         return -ENOMEM;
626
627                 if (keep_groups) {
628                         if ((k = getgroups(ngroups_max, gids)) < 0) {
629                                 free(gids);
630                                 return -errno;
631                         }
632                 } else
633                         k = 0;
634
635                 STRV_FOREACH(i, context->supplementary_groups) {
636                         const char *g;
637
638                         if (k >= ngroups_max) {
639                                 free(gids);
640                                 return -E2BIG;
641                         }
642
643                         g = *i;
644                         r = get_group_creds(&g, gids+k);
645                         if (r < 0) {
646                                 free(gids);
647                                 return r;
648                         }
649
650                         k++;
651                 }
652
653                 if (setgroups(k, gids) < 0) {
654                         free(gids);
655                         return -errno;
656                 }
657
658                 free(gids);
659         }
660
661         return 0;
662 }
663
664 static int enforce_user(const ExecContext *context, uid_t uid) {
665         assert(context);
666
667         /* Sets (but doesn't lookup) the uid and make sure we keep the
668          * capabilities while doing so. */
669
670         if (context->capabilities) {
671                 _cleanup_cap_free_ cap_t d = NULL;
672                 static const cap_value_t bits[] = {
673                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
674                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
675                 };
676
677                 /* First step: If we need to keep capabilities but
678                  * drop privileges we need to make sure we keep our
679                  * caps, while we drop privileges. */
680                 if (uid != 0) {
681                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
682
683                         if (prctl(PR_GET_SECUREBITS) != sb)
684                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
685                                         return -errno;
686                 }
687
688                 /* Second step: set the capabilities. This will reduce
689                  * the capabilities to the minimum we need. */
690
691                 d = cap_dup(context->capabilities);
692                 if (!d)
693                         return -errno;
694
695                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
696                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
697                         return -errno;
698
699                 if (cap_set_proc(d) < 0)
700                         return -errno;
701         }
702
703         /* Third step: actually set the uids */
704         if (setresuid(uid, uid, uid) < 0)
705                 return -errno;
706
707         /* At this point we should have all necessary capabilities but
708            are otherwise a normal user. However, the caps might got
709            corrupted due to the setresuid() so we need clean them up
710            later. This is done outside of this call. */
711
712         return 0;
713 }
714
715 #ifdef HAVE_PAM
716
717 static int null_conv(
718                 int num_msg,
719                 const struct pam_message **msg,
720                 struct pam_response **resp,
721                 void *appdata_ptr) {
722
723         /* We don't support conversations */
724
725         return PAM_CONV_ERR;
726 }
727
728 static int setup_pam(
729                 const char *name,
730                 const char *user,
731                 uid_t uid,
732                 const char *tty,
733                 char ***pam_env,
734                 int fds[], unsigned n_fds) {
735
736         static const struct pam_conv conv = {
737                 .conv = null_conv,
738                 .appdata_ptr = NULL
739         };
740
741         pam_handle_t *handle = NULL;
742         sigset_t ss, old_ss;
743         int pam_code = PAM_SUCCESS;
744         int err;
745         char **e = NULL;
746         bool close_session = false;
747         pid_t pam_pid = 0, parent_pid;
748         int flags = 0;
749
750         assert(name);
751         assert(user);
752         assert(pam_env);
753
754         /* We set up PAM in the parent process, then fork. The child
755          * will then stay around until killed via PR_GET_PDEATHSIG or
756          * systemd via the cgroup logic. It will then remove the PAM
757          * session again. The parent process will exec() the actual
758          * daemon. We do things this way to ensure that the main PID
759          * of the daemon is the one we initially fork()ed. */
760
761         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
762                 flags |= PAM_SILENT;
763
764         pam_code = pam_start(name, user, &conv, &handle);
765         if (pam_code != PAM_SUCCESS) {
766                 handle = NULL;
767                 goto fail;
768         }
769
770         if (tty) {
771                 pam_code = pam_set_item(handle, PAM_TTY, tty);
772                 if (pam_code != PAM_SUCCESS)
773                         goto fail;
774         }
775
776         pam_code = pam_acct_mgmt(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         pam_code = pam_open_session(handle, flags);
781         if (pam_code != PAM_SUCCESS)
782                 goto fail;
783
784         close_session = true;
785
786         e = pam_getenvlist(handle);
787         if (!e) {
788                 pam_code = PAM_BUF_ERR;
789                 goto fail;
790         }
791
792         /* Block SIGTERM, so that we know that it won't get lost in
793          * the child */
794         if (sigemptyset(&ss) < 0 ||
795             sigaddset(&ss, SIGTERM) < 0 ||
796             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
797                 goto fail;
798
799         parent_pid = getpid();
800
801         pam_pid = fork();
802         if (pam_pid < 0)
803                 goto fail;
804
805         if (pam_pid == 0) {
806                 int sig;
807                 int r = EXIT_PAM;
808
809                 /* The child's job is to reset the PAM session on
810                  * termination */
811
812                 /* This string must fit in 10 chars (i.e. the length
813                  * of "/sbin/init"), to look pretty in /bin/ps */
814                 rename_process("(sd-pam)");
815
816                 /* Make sure we don't keep open the passed fds in this
817                 child. We assume that otherwise only those fds are
818                 open here that have been opened by PAM. */
819                 close_many(fds, n_fds);
820
821                 /* Drop privileges - we don't need any to pam_close_session
822                  * and this will make PR_SET_PDEATHSIG work in most cases.
823                  * If this fails, ignore the error - but expect sd-pam threads
824                  * to fail to exit normally */
825                 if (setresuid(uid, uid, uid) < 0)
826                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
827
828                 /* Wait until our parent died. This will only work if
829                  * the above setresuid() succeeds, otherwise the kernel
830                  * will not allow unprivileged parents kill their privileged
831                  * children this way. We rely on the control groups kill logic
832                  * to do the rest for us. */
833                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
834                         goto child_finish;
835
836                 /* Check if our parent process might already have
837                  * died? */
838                 if (getppid() == parent_pid) {
839                         for (;;) {
840                                 if (sigwait(&ss, &sig) < 0) {
841                                         if (errno == EINTR)
842                                                 continue;
843
844                                         goto child_finish;
845                                 }
846
847                                 assert(sig == SIGTERM);
848                                 break;
849                         }
850                 }
851
852                 /* If our parent died we'll end the session */
853                 if (getppid() != parent_pid) {
854                         pam_code = pam_close_session(handle, flags);
855                         if (pam_code != PAM_SUCCESS)
856                                 goto child_finish;
857                 }
858
859                 r = 0;
860
861         child_finish:
862                 pam_end(handle, pam_code | flags);
863                 _exit(r);
864         }
865
866         /* If the child was forked off successfully it will do all the
867          * cleanups, so forget about the handle here. */
868         handle = NULL;
869
870         /* Unblock SIGTERM again in the parent */
871         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
872                 goto fail;
873
874         /* We close the log explicitly here, since the PAM modules
875          * might have opened it, but we don't want this fd around. */
876         closelog();
877
878         *pam_env = e;
879         e = NULL;
880
881         return 0;
882
883 fail:
884         if (pam_code != PAM_SUCCESS) {
885                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
886                 err = -EPERM;  /* PAM errors do not map to errno */
887         } else {
888                 log_error("PAM failed: %m");
889                 err = -errno;
890         }
891
892         if (handle) {
893                 if (close_session)
894                         pam_code = pam_close_session(handle, flags);
895
896                 pam_end(handle, pam_code | flags);
897         }
898
899         strv_free(e);
900
901         closelog();
902
903         if (pam_pid > 1) {
904                 kill(pam_pid, SIGTERM);
905                 kill(pam_pid, SIGCONT);
906         }
907
908         return err;
909 }
910 #endif
911
912 static void rename_process_from_path(const char *path) {
913         char process_name[11];
914         const char *p;
915         size_t l;
916
917         /* This resulting string must fit in 10 chars (i.e. the length
918          * of "/sbin/init") to look pretty in /bin/ps */
919
920         p = basename(path);
921         if (isempty(p)) {
922                 rename_process("(...)");
923                 return;
924         }
925
926         l = strlen(p);
927         if (l > 8) {
928                 /* The end of the process name is usually more
929                  * interesting, since the first bit might just be
930                  * "systemd-" */
931                 p = p + l - 8;
932                 l = 8;
933         }
934
935         process_name[0] = '(';
936         memcpy(process_name+1, p, l);
937         process_name[1+l] = ')';
938         process_name[1+l+1] = 0;
939
940         rename_process(process_name);
941 }
942
943 #ifdef HAVE_SECCOMP
944
945 static int apply_seccomp(const ExecContext *c) {
946         uint32_t negative_action, action;
947         scmp_filter_ctx *seccomp;
948         Iterator i;
949         void *id;
950         int r;
951
952         assert(c);
953
954         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
955
956         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
957         if (!seccomp)
958                 return -ENOMEM;
959
960         if (c->syscall_archs) {
961
962                 SET_FOREACH(id, c->syscall_archs, i) {
963                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
964                         if (r == -EEXIST)
965                                 continue;
966                         if (r < 0)
967                                 goto finish;
968                 }
969
970         } else {
971                 r = seccomp_add_secondary_archs(seccomp);
972                 if (r < 0)
973                         goto finish;
974         }
975
976         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
977         SET_FOREACH(id, c->syscall_filter, i) {
978                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
979                 if (r < 0)
980                         goto finish;
981         }
982
983         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
984         if (r < 0)
985                 goto finish;
986
987         r = seccomp_load(seccomp);
988
989 finish:
990         seccomp_release(seccomp);
991         return r;
992 }
993
994 static int apply_address_families(const ExecContext *c) {
995         scmp_filter_ctx *seccomp;
996         Iterator i;
997         int r;
998
999         assert(c);
1000
1001         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1002         if (!seccomp)
1003                 return -ENOMEM;
1004
1005         r = seccomp_add_secondary_archs(seccomp);
1006         if (r < 0)
1007                 goto finish;
1008
1009         if (c->address_families_whitelist) {
1010                 int af, first = 0, last = 0;
1011                 void *afp;
1012
1013                 /* If this is a whitelist, we first block the address
1014                  * families that are out of range and then everything
1015                  * that is not in the set. First, we find the lowest
1016                  * and highest address family in the set. */
1017
1018                 SET_FOREACH(afp, c->address_families, i) {
1019                         af = PTR_TO_INT(afp);
1020
1021                         if (af <= 0 || af >= af_max())
1022                                 continue;
1023
1024                         if (first == 0 || af < first)
1025                                 first = af;
1026
1027                         if (last == 0 || af > last)
1028                                 last = af;
1029                 }
1030
1031                 assert((first == 0) == (last == 0));
1032
1033                 if (first == 0) {
1034
1035                         /* No entries in the valid range, block everything */
1036                         r = seccomp_rule_add(
1037                                         seccomp,
1038                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1039                                         SCMP_SYS(socket),
1040                                         0);
1041                         if (r < 0)
1042                                 goto finish;
1043
1044                 } else {
1045
1046                         /* Block everything below the first entry */
1047                         r = seccomp_rule_add(
1048                                         seccomp,
1049                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1050                                         SCMP_SYS(socket),
1051                                         1,
1052                                         SCMP_A0(SCMP_CMP_LT, first));
1053                         if (r < 0)
1054                                 goto finish;
1055
1056                         /* Block everything above the last entry */
1057                         r = seccomp_rule_add(
1058                                         seccomp,
1059                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1060                                         SCMP_SYS(socket),
1061                                         1,
1062                                         SCMP_A0(SCMP_CMP_GT, last));
1063                         if (r < 0)
1064                                 goto finish;
1065
1066                         /* Block everything between the first and last
1067                          * entry */
1068                         for (af = 1; af < af_max(); af++) {
1069
1070                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1071                                         continue;
1072
1073                                 r = seccomp_rule_add(
1074                                                 seccomp,
1075                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076                                                 SCMP_SYS(socket),
1077                                                 1,
1078                                                 SCMP_A0(SCMP_CMP_EQ, af));
1079                                 if (r < 0)
1080                                         goto finish;
1081                         }
1082                 }
1083
1084         } else {
1085                 void *af;
1086
1087                 /* If this is a blacklist, then generate one rule for
1088                  * each address family that are then combined in OR
1089                  * checks. */
1090
1091                 SET_FOREACH(af, c->address_families, i) {
1092
1093                         r = seccomp_rule_add(
1094                                         seccomp,
1095                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1096                                         SCMP_SYS(socket),
1097                                         1,
1098                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1099                         if (r < 0)
1100                                 goto finish;
1101                 }
1102         }
1103
1104         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1105         if (r < 0)
1106                 goto finish;
1107
1108         r = seccomp_load(seccomp);
1109
1110 finish:
1111         seccomp_release(seccomp);
1112         return r;
1113 }
1114
1115 #endif
1116
1117 static void do_idle_pipe_dance(int idle_pipe[4]) {
1118         assert(idle_pipe);
1119
1120
1121         safe_close(idle_pipe[1]);
1122         safe_close(idle_pipe[2]);
1123
1124         if (idle_pipe[0] >= 0) {
1125                 int r;
1126
1127                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1128
1129                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1130                         /* Signal systemd that we are bored and want to continue. */
1131                         write(idle_pipe[3], "x", 1);
1132
1133                         /* Wait for systemd to react to the signal above. */
1134                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1135                 }
1136
1137                 safe_close(idle_pipe[0]);
1138
1139         }
1140
1141         safe_close(idle_pipe[3]);
1142 }
1143
1144 static int build_environment(
1145                 const ExecContext *c,
1146                 unsigned n_fds,
1147                 usec_t watchdog_usec,
1148                 const char *home,
1149                 const char *username,
1150                 const char *shell,
1151                 char ***ret) {
1152
1153         _cleanup_strv_free_ char **our_env = NULL;
1154         unsigned n_env = 0;
1155         char *x;
1156
1157         assert(c);
1158         assert(ret);
1159
1160         our_env = new0(char*, 10);
1161         if (!our_env)
1162                 return -ENOMEM;
1163
1164         if (n_fds > 0) {
1165                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1166                         return -ENOMEM;
1167                 our_env[n_env++] = x;
1168
1169                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1170                         return -ENOMEM;
1171                 our_env[n_env++] = x;
1172         }
1173
1174         if (watchdog_usec > 0) {
1175                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1176                         return -ENOMEM;
1177                 our_env[n_env++] = x;
1178
1179                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1180                         return -ENOMEM;
1181                 our_env[n_env++] = x;
1182         }
1183
1184         if (home) {
1185                 x = strappend("HOME=", home);
1186                 if (!x)
1187                         return -ENOMEM;
1188                 our_env[n_env++] = x;
1189         }
1190
1191         if (username) {
1192                 x = strappend("LOGNAME=", username);
1193                 if (!x)
1194                         return -ENOMEM;
1195                 our_env[n_env++] = x;
1196
1197                 x = strappend("USER=", username);
1198                 if (!x)
1199                         return -ENOMEM;
1200                 our_env[n_env++] = x;
1201         }
1202
1203         if (shell) {
1204                 x = strappend("SHELL=", shell);
1205                 if (!x)
1206                         return -ENOMEM;
1207                 our_env[n_env++] = x;
1208         }
1209
1210         if (is_terminal_input(c->std_input) ||
1211             c->std_output == EXEC_OUTPUT_TTY ||
1212             c->std_error == EXEC_OUTPUT_TTY ||
1213             c->tty_path) {
1214
1215                 x = strdup(default_term_for_tty(tty_path(c)));
1216                 if (!x)
1217                         return -ENOMEM;
1218                 our_env[n_env++] = x;
1219         }
1220
1221         our_env[n_env++] = NULL;
1222         assert(n_env <= 10);
1223
1224         *ret = our_env;
1225         our_env = NULL;
1226
1227         return 0;
1228 }
1229
1230 static int exec_child(ExecCommand *command,
1231                       const ExecContext *context,
1232                       const ExecParameters *params,
1233                       ExecRuntime *runtime,
1234                       char **argv,
1235                       int socket_fd,
1236                       int *fds, unsigned n_fds,
1237                       char **files_env,
1238                       int *error) {
1239
1240         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1241         const char *username = NULL, *home = NULL, *shell = NULL;
1242         unsigned n_dont_close = 0;
1243         int dont_close[n_fds + 4];
1244         uid_t uid = (uid_t) -1;
1245         gid_t gid = (gid_t) -1;
1246         int i, err;
1247
1248         assert(command);
1249         assert(context);
1250         assert(params);
1251         assert(error);
1252
1253         rename_process_from_path(command->path);
1254
1255         /* We reset exactly these signals, since they are the
1256          * only ones we set to SIG_IGN in the main daemon. All
1257          * others we leave untouched because we set them to
1258          * SIG_DFL or a valid handler initially, both of which
1259          * will be demoted to SIG_DFL. */
1260         default_signals(SIGNALS_CRASH_HANDLER,
1261                         SIGNALS_IGNORE, -1);
1262
1263         if (context->ignore_sigpipe)
1264                 ignore_signals(SIGPIPE, -1);
1265
1266         err = reset_signal_mask();
1267         if (err < 0) {
1268                 *error = EXIT_SIGNAL_MASK;
1269                 return err;
1270         }
1271
1272         if (params->idle_pipe)
1273                 do_idle_pipe_dance(params->idle_pipe);
1274
1275         /* Close sockets very early to make sure we don't
1276          * block init reexecution because it cannot bind its
1277          * sockets */
1278         log_forget_fds();
1279
1280         if (socket_fd >= 0)
1281                 dont_close[n_dont_close++] = socket_fd;
1282         if (n_fds > 0) {
1283                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1284                 n_dont_close += n_fds;
1285         }
1286         if (params->bus_endpoint_fd >= 0)
1287                 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1288         if (runtime) {
1289                 if (runtime->netns_storage_socket[0] >= 0)
1290                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1291                 if (runtime->netns_storage_socket[1] >= 0)
1292                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1293         }
1294
1295         err = close_all_fds(dont_close, n_dont_close);
1296         if (err < 0) {
1297                 *error = EXIT_FDS;
1298                 return err;
1299         }
1300
1301         if (!context->same_pgrp)
1302                 if (setsid() < 0) {
1303                         *error = EXIT_SETSID;
1304                         return -errno;
1305                 }
1306
1307         exec_context_tty_reset(context);
1308
1309         if (params->confirm_spawn) {
1310                 char response;
1311
1312                 err = ask_for_confirmation(&response, argv);
1313                 if (err == -ETIMEDOUT)
1314                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1315                 else if (err < 0)
1316                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1317                 else if (response == 's') {
1318                         write_confirm_message("Skipping execution.\n");
1319                         *error = EXIT_CONFIRM;
1320                         return -ECANCELED;
1321                 } else if (response == 'n') {
1322                         write_confirm_message("Failing execution.\n");
1323                         *error = 0;
1324                         return 0;
1325                 }
1326         }
1327
1328         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1329          * must sure to drop O_NONBLOCK */
1330         if (socket_fd >= 0)
1331                 fd_nonblock(socket_fd, false);
1332
1333         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1334         if (err < 0) {
1335                 *error = EXIT_STDIN;
1336                 return err;
1337         }
1338
1339         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1340         if (err < 0) {
1341                 *error = EXIT_STDOUT;
1342                 return err;
1343         }
1344
1345         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1346         if (err < 0) {
1347                 *error = EXIT_STDERR;
1348                 return err;
1349         }
1350
1351         if (params->cgroup_path) {
1352                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1353                 if (err < 0) {
1354                         *error = EXIT_CGROUP;
1355                         return err;
1356                 }
1357         }
1358
1359         if (context->oom_score_adjust_set) {
1360                 char t[16];
1361
1362                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1363                 char_array_0(t);
1364
1365                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1366                         *error = EXIT_OOM_ADJUST;
1367                         return -errno;
1368                 }
1369         }
1370
1371         if (context->nice_set)
1372                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1373                         *error = EXIT_NICE;
1374                         return -errno;
1375                 }
1376
1377         if (context->cpu_sched_set) {
1378                 struct sched_param param = {
1379                         .sched_priority = context->cpu_sched_priority,
1380                 };
1381
1382                 err = sched_setscheduler(0,
1383                                          context->cpu_sched_policy |
1384                                          (context->cpu_sched_reset_on_fork ?
1385                                           SCHED_RESET_ON_FORK : 0),
1386                                          &param);
1387                 if (err < 0) {
1388                         *error = EXIT_SETSCHEDULER;
1389                         return -errno;
1390                 }
1391         }
1392
1393         if (context->cpuset)
1394                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1395                         *error = EXIT_CPUAFFINITY;
1396                         return -errno;
1397                 }
1398
1399         if (context->ioprio_set)
1400                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1401                         *error = EXIT_IOPRIO;
1402                         return -errno;
1403                 }
1404
1405         if (context->timer_slack_nsec != NSEC_INFINITY)
1406                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1407                         *error = EXIT_TIMERSLACK;
1408                         return -errno;
1409                 }
1410
1411         if (context->personality != 0xffffffffUL)
1412                 if (personality(context->personality) < 0) {
1413                         *error = EXIT_PERSONALITY;
1414                         return -errno;
1415                 }
1416
1417         if (context->utmp_id)
1418                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1419
1420         if (context->user) {
1421                 username = context->user;
1422                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1423                 if (err < 0) {
1424                         *error = EXIT_USER;
1425                         return err;
1426                 }
1427
1428                 if (is_terminal_input(context->std_input)) {
1429                         err = chown_terminal(STDIN_FILENO, uid);
1430                         if (err < 0) {
1431                                 *error = EXIT_STDIN;
1432                                 return err;
1433                         }
1434                 }
1435         }
1436
1437 #ifdef ENABLE_KDBUS
1438         if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1439                 uid_t ep_uid = (uid == (uid_t) -1) ? 0 : uid;
1440
1441                 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1442                 if (err < 0) {
1443                         *error = EXIT_BUS_ENDPOINT;
1444                         return err;
1445                 }
1446         }
1447 #endif
1448
1449         /* If delegation is enabled we'll pass ownership of the cgroup
1450          * (but only in systemd's own controller hierarchy!) to the
1451          * user of the new process. */
1452         if (params->cgroup_path && context->user && params->cgroup_delegate) {
1453                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1454                 if (err < 0) {
1455                         *error = EXIT_CGROUP;
1456                         return err;
1457                 }
1458
1459
1460                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1461                 if (err < 0) {
1462                         *error = EXIT_CGROUP;
1463                         return err;
1464                 }
1465         }
1466
1467         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1468                 char **rt;
1469
1470                 STRV_FOREACH(rt, context->runtime_directory) {
1471                         _cleanup_free_ char *p;
1472
1473                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1474                         if (!p) {
1475                                 *error = EXIT_RUNTIME_DIRECTORY;
1476                                 return -ENOMEM;
1477                         }
1478
1479                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1480                         if (err < 0) {
1481                                 *error = EXIT_RUNTIME_DIRECTORY;
1482                                 return err;
1483                         }
1484                 }
1485         }
1486
1487         if (params->apply_permissions) {
1488                 err = enforce_groups(context, username, gid);
1489                 if (err < 0) {
1490                         *error = EXIT_GROUP;
1491                         return err;
1492                 }
1493         }
1494
1495         umask(context->umask);
1496
1497 #ifdef HAVE_PAM
1498         if (params->apply_permissions && context->pam_name && username) {
1499                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1500                 if (err < 0) {
1501                         *error = EXIT_PAM;
1502                         return err;
1503                 }
1504         }
1505 #endif
1506
1507         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1508                 err = setup_netns(runtime->netns_storage_socket);
1509                 if (err < 0) {
1510                         *error = EXIT_NETWORK;
1511                         return err;
1512                 }
1513         }
1514
1515         if (!strv_isempty(context->read_write_dirs) ||
1516             !strv_isempty(context->read_only_dirs) ||
1517             !strv_isempty(context->inaccessible_dirs) ||
1518             context->mount_flags != 0 ||
1519             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1520             params->bus_endpoint_path ||
1521             context->private_devices ||
1522             context->protect_system != PROTECT_SYSTEM_NO ||
1523             context->protect_home != PROTECT_HOME_NO) {
1524
1525                 char *tmp = NULL, *var = NULL;
1526
1527                 /* The runtime struct only contains the parent
1528                  * of the private /tmp, which is
1529                  * non-accessible to world users. Inside of it
1530                  * there's a /tmp that is sticky, and that's
1531                  * the one we want to use here. */
1532
1533                 if (context->private_tmp && runtime) {
1534                         if (runtime->tmp_dir)
1535                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1536                         if (runtime->var_tmp_dir)
1537                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1538                 }
1539
1540                 err = setup_namespace(
1541                                 context->read_write_dirs,
1542                                 context->read_only_dirs,
1543                                 context->inaccessible_dirs,
1544                                 tmp,
1545                                 var,
1546                                 params->bus_endpoint_path,
1547                                 context->private_devices,
1548                                 context->protect_home,
1549                                 context->protect_system,
1550                                 context->mount_flags);
1551
1552                 if (err == -EPERM)
1553                         log_unit_warning(params->unit_id, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %s", strerror(-err));
1554                 else if (err < 0) {
1555                         *error = EXIT_NAMESPACE;
1556                         return err;
1557                 }
1558         }
1559
1560         if (params->apply_chroot) {
1561                 if (context->root_directory)
1562                         if (chroot(context->root_directory) < 0) {
1563                                 *error = EXIT_CHROOT;
1564                                 return -errno;
1565                         }
1566
1567                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1568                         *error = EXIT_CHDIR;
1569                         return -errno;
1570                 }
1571         } else {
1572                 _cleanup_free_ char *d = NULL;
1573
1574                 if (asprintf(&d, "%s/%s",
1575                              context->root_directory ? context->root_directory : "",
1576                              context->working_directory ? context->working_directory : "") < 0) {
1577                         *error = EXIT_MEMORY;
1578                         return -ENOMEM;
1579                 }
1580
1581                 if (chdir(d) < 0) {
1582                         *error = EXIT_CHDIR;
1583                         return -errno;
1584                 }
1585         }
1586
1587         /* We repeat the fd closing here, to make sure that
1588          * nothing is leaked from the PAM modules. Note that
1589          * we are more aggressive this time since socket_fd
1590          * and the netns fds we don't need anymore. The custom
1591          * endpoint fd was needed to upload the policy and can
1592          * now be closed as well. */
1593         err = close_all_fds(fds, n_fds);
1594         if (err >= 0)
1595                 err = shift_fds(fds, n_fds);
1596         if (err >= 0)
1597                 err = flags_fds(fds, n_fds, context->non_blocking);
1598         if (err < 0) {
1599                 *error = EXIT_FDS;
1600                 return err;
1601         }
1602
1603         if (params->apply_permissions) {
1604
1605                 for (i = 0; i < _RLIMIT_MAX; i++) {
1606                         if (!context->rlimit[i])
1607                                 continue;
1608
1609                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1610                                 *error = EXIT_LIMITS;
1611                                 return -errno;
1612                         }
1613                 }
1614
1615                 if (context->capability_bounding_set_drop) {
1616                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1617                         if (err < 0) {
1618                                 *error = EXIT_CAPABILITIES;
1619                                 return err;
1620                         }
1621                 }
1622
1623 #ifdef HAVE_SMACK
1624                 if (context->smack_process_label) {
1625                         err = mac_smack_apply_pid(0, context->smack_process_label);
1626                         if (err < 0) {
1627                                 *error = EXIT_SMACK_PROCESS_LABEL;
1628                                 return err;
1629                         }
1630                 }
1631 #endif
1632
1633                 if (context->user) {
1634                         err = enforce_user(context, uid);
1635                         if (err < 0) {
1636                                 *error = EXIT_USER;
1637                                 return err;
1638                         }
1639                 }
1640
1641                 /* PR_GET_SECUREBITS is not privileged, while
1642                  * PR_SET_SECUREBITS is. So to suppress
1643                  * potential EPERMs we'll try not to call
1644                  * PR_SET_SECUREBITS unless necessary. */
1645                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1646                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1647                                 *error = EXIT_SECUREBITS;
1648                                 return -errno;
1649                         }
1650
1651                 if (context->capabilities)
1652                         if (cap_set_proc(context->capabilities) < 0) {
1653                                 *error = EXIT_CAPABILITIES;
1654                                 return -errno;
1655                         }
1656
1657                 if (context->no_new_privileges)
1658                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1659                                 *error = EXIT_NO_NEW_PRIVILEGES;
1660                                 return -errno;
1661                         }
1662
1663 #ifdef HAVE_SECCOMP
1664                 if (context->address_families_whitelist ||
1665                     !set_isempty(context->address_families)) {
1666                         err = apply_address_families(context);
1667                         if (err < 0) {
1668                                 *error = EXIT_ADDRESS_FAMILIES;
1669                                 return err;
1670                         }
1671                 }
1672
1673                 if (context->syscall_whitelist ||
1674                     !set_isempty(context->syscall_filter) ||
1675                     !set_isempty(context->syscall_archs)) {
1676                         err = apply_seccomp(context);
1677                         if (err < 0) {
1678                                 *error = EXIT_SECCOMP;
1679                                 return err;
1680                         }
1681                 }
1682 #endif
1683
1684 #ifdef HAVE_SELINUX
1685                 if (mac_selinux_use()) {
1686                         if (context->selinux_context) {
1687                                 err = setexeccon(context->selinux_context);
1688                                 if (err < 0 && !context->selinux_context_ignore) {
1689                                         *error = EXIT_SELINUX_CONTEXT;
1690                                         return err;
1691                                 }
1692                         }
1693
1694                         if (params->selinux_context_net && socket_fd >= 0) {
1695                                 _cleanup_free_ char *label = NULL;
1696
1697                                 err = mac_selinux_get_child_mls_label(socket_fd, command->path, &label);
1698                                 if (err < 0) {
1699                                         *error = EXIT_SELINUX_CONTEXT;
1700                                         return err;
1701                                 }
1702
1703                                 err = setexeccon(label);
1704                                 if (err < 0) {
1705                                         *error = EXIT_SELINUX_CONTEXT;
1706                                         return err;
1707                                 }
1708                         }
1709                 }
1710 #endif
1711
1712 #ifdef HAVE_APPARMOR
1713                 if (context->apparmor_profile && mac_apparmor_use()) {
1714                         err = aa_change_onexec(context->apparmor_profile);
1715                         if (err < 0 && !context->apparmor_profile_ignore) {
1716                                 *error = EXIT_APPARMOR_PROFILE;
1717                                 return -errno;
1718                         }
1719                 }
1720 #endif
1721         }
1722
1723         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1724         if (err < 0) {
1725                 *error = EXIT_MEMORY;
1726                 return err;
1727         }
1728
1729         final_env = strv_env_merge(5,
1730                                    params->environment,
1731                                    our_env,
1732                                    context->environment,
1733                                    files_env,
1734                                    pam_env,
1735                                    NULL);
1736         if (!final_env) {
1737                 *error = EXIT_MEMORY;
1738                 return -ENOMEM;
1739         }
1740
1741         final_argv = replace_env_argv(argv, final_env);
1742         if (!final_argv) {
1743                 *error = EXIT_MEMORY;
1744                 return -ENOMEM;
1745         }
1746
1747         final_env = strv_env_clean(final_env);
1748
1749         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1750                 _cleanup_free_ char *line;
1751
1752                 line = exec_command_line(final_argv);
1753                 if (line) {
1754                         log_open();
1755                         log_unit_struct(params->unit_id,
1756                                         LOG_DEBUG,
1757                                         "EXECUTABLE=%s", command->path,
1758                                         LOG_MESSAGE("Executing: %s", line),
1759                                         NULL);
1760                         log_close();
1761                 }
1762         }
1763         execve(command->path, final_argv, final_env);
1764         *error = EXIT_EXEC;
1765         return -errno;
1766 }
1767
1768 int exec_spawn(ExecCommand *command,
1769                const ExecContext *context,
1770                const ExecParameters *params,
1771                ExecRuntime *runtime,
1772                pid_t *ret) {
1773
1774         _cleanup_strv_free_ char **files_env = NULL;
1775         int *fds = NULL; unsigned n_fds = 0;
1776         char *line, **argv;
1777         int socket_fd;
1778         pid_t pid;
1779         int err;
1780
1781         assert(command);
1782         assert(context);
1783         assert(ret);
1784         assert(params);
1785         assert(params->fds || params->n_fds <= 0);
1786
1787         if (context->std_input == EXEC_INPUT_SOCKET ||
1788             context->std_output == EXEC_OUTPUT_SOCKET ||
1789             context->std_error == EXEC_OUTPUT_SOCKET) {
1790
1791                 if (params->n_fds != 1)
1792                         return -EINVAL;
1793
1794                 socket_fd = params->fds[0];
1795         } else {
1796                 socket_fd = -1;
1797                 fds = params->fds;
1798                 n_fds = params->n_fds;
1799         }
1800
1801         err = exec_context_load_environment(context, params->unit_id, &files_env);
1802         if (err < 0) {
1803                 log_unit_struct(params->unit_id,
1804                                 LOG_ERR,
1805                                 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1806                                 LOG_ERRNO(-err),
1807                                 NULL);
1808                 return err;
1809         }
1810
1811         argv = params->argv ?: command->argv;
1812
1813         line = exec_command_line(argv);
1814         if (!line)
1815                 return log_oom();
1816
1817         log_unit_struct(params->unit_id,
1818                         LOG_DEBUG,
1819                         "EXECUTABLE=%s", command->path,
1820                         LOG_MESSAGE("About to execute: %s", line),
1821                         NULL);
1822         free(line);
1823
1824         pid = fork();
1825         if (pid < 0)
1826                 return -errno;
1827
1828         if (pid == 0) {
1829                 int r;
1830
1831                 err = exec_child(command,
1832                                  context,
1833                                  params,
1834                                  runtime,
1835                                  argv,
1836                                  socket_fd,
1837                                  fds, n_fds,
1838                                  files_env,
1839                                  &r);
1840                 if (r != 0) {
1841                         log_open();
1842                         log_struct(LOG_ERR,
1843                                    LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1844                                    "EXECUTABLE=%s", command->path,
1845                                    LOG_MESSAGE("Failed at step %s spawning %s: %s",
1846                                                exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1847                                                command->path, strerror(-err)),
1848                                    LOG_ERRNO(-err),
1849                                    NULL);
1850                         log_close();
1851                 }
1852
1853                 _exit(r);
1854         }
1855
1856         log_unit_struct(params->unit_id,
1857                         LOG_DEBUG,
1858                         LOG_MESSAGE("Forked %s as "PID_FMT,
1859                                     command->path, pid),
1860                         NULL);
1861
1862         /* We add the new process to the cgroup both in the child (so
1863          * that we can be sure that no user code is ever executed
1864          * outside of the cgroup) and in the parent (so that we can be
1865          * sure that when we kill the cgroup the process will be
1866          * killed too). */
1867         if (params->cgroup_path)
1868                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1869
1870         exec_status_start(&command->exec_status, pid);
1871
1872         *ret = pid;
1873         return 0;
1874 }
1875
1876 void exec_context_init(ExecContext *c) {
1877         assert(c);
1878
1879         c->umask = 0022;
1880         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1881         c->cpu_sched_policy = SCHED_OTHER;
1882         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1883         c->syslog_level_prefix = true;
1884         c->ignore_sigpipe = true;
1885         c->timer_slack_nsec = NSEC_INFINITY;
1886         c->personality = 0xffffffffUL;
1887         c->runtime_directory_mode = 0755;
1888 }
1889
1890 void exec_context_done(ExecContext *c) {
1891         unsigned l;
1892
1893         assert(c);
1894
1895         strv_free(c->environment);
1896         c->environment = NULL;
1897
1898         strv_free(c->environment_files);
1899         c->environment_files = NULL;
1900
1901         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1902                 free(c->rlimit[l]);
1903                 c->rlimit[l] = NULL;
1904         }
1905
1906         free(c->working_directory);
1907         c->working_directory = NULL;
1908         free(c->root_directory);
1909         c->root_directory = NULL;
1910
1911         free(c->tty_path);
1912         c->tty_path = NULL;
1913
1914         free(c->syslog_identifier);
1915         c->syslog_identifier = NULL;
1916
1917         free(c->user);
1918         c->user = NULL;
1919
1920         free(c->group);
1921         c->group = NULL;
1922
1923         strv_free(c->supplementary_groups);
1924         c->supplementary_groups = NULL;
1925
1926         free(c->pam_name);
1927         c->pam_name = NULL;
1928
1929         if (c->capabilities) {
1930                 cap_free(c->capabilities);
1931                 c->capabilities = NULL;
1932         }
1933
1934         strv_free(c->read_only_dirs);
1935         c->read_only_dirs = NULL;
1936
1937         strv_free(c->read_write_dirs);
1938         c->read_write_dirs = NULL;
1939
1940         strv_free(c->inaccessible_dirs);
1941         c->inaccessible_dirs = NULL;
1942
1943         if (c->cpuset)
1944                 CPU_FREE(c->cpuset);
1945
1946         free(c->utmp_id);
1947         c->utmp_id = NULL;
1948
1949         free(c->selinux_context);
1950         c->selinux_context = NULL;
1951
1952         free(c->apparmor_profile);
1953         c->apparmor_profile = NULL;
1954
1955         set_free(c->syscall_filter);
1956         c->syscall_filter = NULL;
1957
1958         set_free(c->syscall_archs);
1959         c->syscall_archs = NULL;
1960
1961         set_free(c->address_families);
1962         c->address_families = NULL;
1963
1964         strv_free(c->runtime_directory);
1965         c->runtime_directory = NULL;
1966
1967         bus_endpoint_free(c->bus_endpoint);
1968         c->bus_endpoint = NULL;
1969 }
1970
1971 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1972         char **i;
1973
1974         assert(c);
1975
1976         if (!runtime_prefix)
1977                 return 0;
1978
1979         STRV_FOREACH(i, c->runtime_directory) {
1980                 _cleanup_free_ char *p;
1981
1982                 p = strjoin(runtime_prefix, "/", *i, NULL);
1983                 if (!p)
1984                         return -ENOMEM;
1985
1986                 /* We execute this synchronously, since we need to be
1987                  * sure this is gone when we start the service
1988                  * next. */
1989                 rm_rf_dangerous(p, false, true, false);
1990         }
1991
1992         return 0;
1993 }
1994
1995 void exec_command_done(ExecCommand *c) {
1996         assert(c);
1997
1998         free(c->path);
1999         c->path = NULL;
2000
2001         strv_free(c->argv);
2002         c->argv = NULL;
2003 }
2004
2005 void exec_command_done_array(ExecCommand *c, unsigned n) {
2006         unsigned i;
2007
2008         for (i = 0; i < n; i++)
2009                 exec_command_done(c+i);
2010 }
2011
2012 void exec_command_free_list(ExecCommand *c) {
2013         ExecCommand *i;
2014
2015         while ((i = c)) {
2016                 LIST_REMOVE(command, c, i);
2017                 exec_command_done(i);
2018                 free(i);
2019         }
2020 }
2021
2022 void exec_command_free_array(ExecCommand **c, unsigned n) {
2023         unsigned i;
2024
2025         for (i = 0; i < n; i++) {
2026                 exec_command_free_list(c[i]);
2027                 c[i] = NULL;
2028         }
2029 }
2030
2031 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2032         char **i, **r = NULL;
2033
2034         assert(c);
2035         assert(l);
2036
2037         STRV_FOREACH(i, c->environment_files) {
2038                 char *fn;
2039                 int k;
2040                 bool ignore = false;
2041                 char **p;
2042                 _cleanup_globfree_ glob_t pglob = {};
2043                 int count, n;
2044
2045                 fn = *i;
2046
2047                 if (fn[0] == '-') {
2048                         ignore = true;
2049                         fn ++;
2050                 }
2051
2052                 if (!path_is_absolute(fn)) {
2053                         if (ignore)
2054                                 continue;
2055
2056                         strv_free(r);
2057                         return -EINVAL;
2058                 }
2059
2060                 /* Filename supports globbing, take all matching files */
2061                 errno = 0;
2062                 if (glob(fn, 0, NULL, &pglob) != 0) {
2063                         if (ignore)
2064                                 continue;
2065
2066                         strv_free(r);
2067                         return errno ? -errno : -EINVAL;
2068                 }
2069                 count = pglob.gl_pathc;
2070                 if (count == 0) {
2071                         if (ignore)
2072                                 continue;
2073
2074                         strv_free(r);
2075                         return -EINVAL;
2076                 }
2077                 for (n = 0; n < count; n++) {
2078                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2079                         if (k < 0) {
2080                                 if (ignore)
2081                                         continue;
2082
2083                                 strv_free(r);
2084                                 return k;
2085                         }
2086                         /* Log invalid environment variables with filename */
2087                         if (p)
2088                                 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2089
2090                         if (r == NULL)
2091                                 r = p;
2092                         else {
2093                                 char **m;
2094
2095                                 m = strv_env_merge(2, r, p);
2096                                 strv_free(r);
2097                                 strv_free(p);
2098                                 if (!m)
2099                                         return -ENOMEM;
2100
2101                                 r = m;
2102                         }
2103                 }
2104         }
2105
2106         *l = r;
2107
2108         return 0;
2109 }
2110
2111 static bool tty_may_match_dev_console(const char *tty) {
2112         _cleanup_free_ char *active = NULL;
2113        char *console;
2114
2115         if (startswith(tty, "/dev/"))
2116                 tty += 5;
2117
2118         /* trivial identity? */
2119         if (streq(tty, "console"))
2120                 return true;
2121
2122         console = resolve_dev_console(&active);
2123         /* if we could not resolve, assume it may */
2124         if (!console)
2125                 return true;
2126
2127         /* "tty0" means the active VC, so it may be the same sometimes */
2128         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2129 }
2130
2131 bool exec_context_may_touch_console(ExecContext *ec) {
2132         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2133                 is_terminal_input(ec->std_input) ||
2134                 is_terminal_output(ec->std_output) ||
2135                 is_terminal_output(ec->std_error)) &&
2136                tty_may_match_dev_console(tty_path(ec));
2137 }
2138
2139 static void strv_fprintf(FILE *f, char **l) {
2140         char **g;
2141
2142         assert(f);
2143
2144         STRV_FOREACH(g, l)
2145                 fprintf(f, " %s", *g);
2146 }
2147
2148 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2149         char **e;
2150         unsigned i;
2151
2152         assert(c);
2153         assert(f);
2154
2155         prefix = strempty(prefix);
2156
2157         fprintf(f,
2158                 "%sUMask: %04o\n"
2159                 "%sWorkingDirectory: %s\n"
2160                 "%sRootDirectory: %s\n"
2161                 "%sNonBlocking: %s\n"
2162                 "%sPrivateTmp: %s\n"
2163                 "%sPrivateNetwork: %s\n"
2164                 "%sPrivateDevices: %s\n"
2165                 "%sProtectHome: %s\n"
2166                 "%sProtectSystem: %s\n"
2167                 "%sIgnoreSIGPIPE: %s\n",
2168                 prefix, c->umask,
2169                 prefix, c->working_directory ? c->working_directory : "/",
2170                 prefix, c->root_directory ? c->root_directory : "/",
2171                 prefix, yes_no(c->non_blocking),
2172                 prefix, yes_no(c->private_tmp),
2173                 prefix, yes_no(c->private_network),
2174                 prefix, yes_no(c->private_devices),
2175                 prefix, protect_home_to_string(c->protect_home),
2176                 prefix, protect_system_to_string(c->protect_system),
2177                 prefix, yes_no(c->ignore_sigpipe));
2178
2179         STRV_FOREACH(e, c->environment)
2180                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2181
2182         STRV_FOREACH(e, c->environment_files)
2183                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2184
2185         if (c->nice_set)
2186                 fprintf(f,
2187                         "%sNice: %i\n",
2188                         prefix, c->nice);
2189
2190         if (c->oom_score_adjust_set)
2191                 fprintf(f,
2192                         "%sOOMScoreAdjust: %i\n",
2193                         prefix, c->oom_score_adjust);
2194
2195         for (i = 0; i < RLIM_NLIMITS; i++)
2196                 if (c->rlimit[i])
2197                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2198                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2199
2200         if (c->ioprio_set) {
2201                 _cleanup_free_ char *class_str = NULL;
2202
2203                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2204                 fprintf(f,
2205                         "%sIOSchedulingClass: %s\n"
2206                         "%sIOPriority: %i\n",
2207                         prefix, strna(class_str),
2208                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2209         }
2210
2211         if (c->cpu_sched_set) {
2212                 _cleanup_free_ char *policy_str = NULL;
2213
2214                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2215                 fprintf(f,
2216                         "%sCPUSchedulingPolicy: %s\n"
2217                         "%sCPUSchedulingPriority: %i\n"
2218                         "%sCPUSchedulingResetOnFork: %s\n",
2219                         prefix, strna(policy_str),
2220                         prefix, c->cpu_sched_priority,
2221                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2222         }
2223
2224         if (c->cpuset) {
2225                 fprintf(f, "%sCPUAffinity:", prefix);
2226                 for (i = 0; i < c->cpuset_ncpus; i++)
2227                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2228                                 fprintf(f, " %u", i);
2229                 fputs("\n", f);
2230         }
2231
2232         if (c->timer_slack_nsec != NSEC_INFINITY)
2233                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2234
2235         fprintf(f,
2236                 "%sStandardInput: %s\n"
2237                 "%sStandardOutput: %s\n"
2238                 "%sStandardError: %s\n",
2239                 prefix, exec_input_to_string(c->std_input),
2240                 prefix, exec_output_to_string(c->std_output),
2241                 prefix, exec_output_to_string(c->std_error));
2242
2243         if (c->tty_path)
2244                 fprintf(f,
2245                         "%sTTYPath: %s\n"
2246                         "%sTTYReset: %s\n"
2247                         "%sTTYVHangup: %s\n"
2248                         "%sTTYVTDisallocate: %s\n",
2249                         prefix, c->tty_path,
2250                         prefix, yes_no(c->tty_reset),
2251                         prefix, yes_no(c->tty_vhangup),
2252                         prefix, yes_no(c->tty_vt_disallocate));
2253
2254         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2255             c->std_output == EXEC_OUTPUT_KMSG ||
2256             c->std_output == EXEC_OUTPUT_JOURNAL ||
2257             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2258             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2259             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2260             c->std_error == EXEC_OUTPUT_SYSLOG ||
2261             c->std_error == EXEC_OUTPUT_KMSG ||
2262             c->std_error == EXEC_OUTPUT_JOURNAL ||
2263             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2264             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2265             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2266
2267                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2268
2269                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2270                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2271
2272                 fprintf(f,
2273                         "%sSyslogFacility: %s\n"
2274                         "%sSyslogLevel: %s\n",
2275                         prefix, strna(fac_str),
2276                         prefix, strna(lvl_str));
2277         }
2278
2279         if (c->capabilities) {
2280                 _cleanup_cap_free_charp_ char *t;
2281
2282                 t = cap_to_text(c->capabilities, NULL);
2283                 if (t)
2284                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2285         }
2286
2287         if (c->secure_bits)
2288                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2289                         prefix,
2290                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2291                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2292                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2293                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2294                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2295                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2296
2297         if (c->capability_bounding_set_drop) {
2298                 unsigned long l;
2299                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2300
2301                 for (l = 0; l <= cap_last_cap(); l++)
2302                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2303                                 _cleanup_cap_free_charp_ char *t;
2304
2305                                 t = cap_to_name(l);
2306                                 if (t)
2307                                         fprintf(f, " %s", t);
2308                         }
2309
2310                 fputs("\n", f);
2311         }
2312
2313         if (c->user)
2314                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2315         if (c->group)
2316                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2317
2318         if (strv_length(c->supplementary_groups) > 0) {
2319                 fprintf(f, "%sSupplementaryGroups:", prefix);
2320                 strv_fprintf(f, c->supplementary_groups);
2321                 fputs("\n", f);
2322         }
2323
2324         if (c->pam_name)
2325                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2326
2327         if (strv_length(c->read_write_dirs) > 0) {
2328                 fprintf(f, "%sReadWriteDirs:", prefix);
2329                 strv_fprintf(f, c->read_write_dirs);
2330                 fputs("\n", f);
2331         }
2332
2333         if (strv_length(c->read_only_dirs) > 0) {
2334                 fprintf(f, "%sReadOnlyDirs:", prefix);
2335                 strv_fprintf(f, c->read_only_dirs);
2336                 fputs("\n", f);
2337         }
2338
2339         if (strv_length(c->inaccessible_dirs) > 0) {
2340                 fprintf(f, "%sInaccessibleDirs:", prefix);
2341                 strv_fprintf(f, c->inaccessible_dirs);
2342                 fputs("\n", f);
2343         }
2344
2345         if (c->utmp_id)
2346                 fprintf(f,
2347                         "%sUtmpIdentifier: %s\n",
2348                         prefix, c->utmp_id);
2349
2350         if (c->selinux_context)
2351                 fprintf(f,
2352                         "%sSELinuxContext: %s%s\n",
2353                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2354
2355         if (c->personality != 0xffffffffUL)
2356                 fprintf(f,
2357                         "%sPersonality: %s\n",
2358                         prefix, strna(personality_to_string(c->personality)));
2359
2360         if (c->syscall_filter) {
2361 #ifdef HAVE_SECCOMP
2362                 Iterator j;
2363                 void *id;
2364                 bool first = true;
2365 #endif
2366
2367                 fprintf(f,
2368                         "%sSystemCallFilter: ",
2369                         prefix);
2370
2371                 if (!c->syscall_whitelist)
2372                         fputc('~', f);
2373
2374 #ifdef HAVE_SECCOMP
2375                 SET_FOREACH(id, c->syscall_filter, j) {
2376                         _cleanup_free_ char *name = NULL;
2377
2378                         if (first)
2379                                 first = false;
2380                         else
2381                                 fputc(' ', f);
2382
2383                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2384                         fputs(strna(name), f);
2385                 }
2386 #endif
2387
2388                 fputc('\n', f);
2389         }
2390
2391         if (c->syscall_archs) {
2392 #ifdef HAVE_SECCOMP
2393                 Iterator j;
2394                 void *id;
2395 #endif
2396
2397                 fprintf(f,
2398                         "%sSystemCallArchitectures:",
2399                         prefix);
2400
2401 #ifdef HAVE_SECCOMP
2402                 SET_FOREACH(id, c->syscall_archs, j)
2403                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2404 #endif
2405                 fputc('\n', f);
2406         }
2407
2408         if (c->syscall_errno != 0)
2409                 fprintf(f,
2410                         "%sSystemCallErrorNumber: %s\n",
2411                         prefix, strna(errno_to_name(c->syscall_errno)));
2412
2413         if (c->apparmor_profile)
2414                 fprintf(f,
2415                         "%sAppArmorProfile: %s%s\n",
2416                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2417 }
2418
2419 bool exec_context_maintains_privileges(ExecContext *c) {
2420         assert(c);
2421
2422         /* Returns true if the process forked off would run run under
2423          * an unchanged UID or as root. */
2424
2425         if (!c->user)
2426                 return true;
2427
2428         if (streq(c->user, "root") || streq(c->user, "0"))
2429                 return true;
2430
2431         return false;
2432 }
2433
2434 void exec_status_start(ExecStatus *s, pid_t pid) {
2435         assert(s);
2436
2437         zero(*s);
2438         s->pid = pid;
2439         dual_timestamp_get(&s->start_timestamp);
2440 }
2441
2442 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2443         assert(s);
2444
2445         if (s->pid && s->pid != pid)
2446                 zero(*s);
2447
2448         s->pid = pid;
2449         dual_timestamp_get(&s->exit_timestamp);
2450
2451         s->code = code;
2452         s->status = status;
2453
2454         if (context) {
2455                 if (context->utmp_id)
2456                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2457
2458                 exec_context_tty_reset(context);
2459         }
2460 }
2461
2462 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2463         char buf[FORMAT_TIMESTAMP_MAX];
2464
2465         assert(s);
2466         assert(f);
2467
2468         if (s->pid <= 0)
2469                 return;
2470
2471         prefix = strempty(prefix);
2472
2473         fprintf(f,
2474                 "%sPID: "PID_FMT"\n",
2475                 prefix, s->pid);
2476
2477         if (s->start_timestamp.realtime > 0)
2478                 fprintf(f,
2479                         "%sStart Timestamp: %s\n",
2480                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2481
2482         if (s->exit_timestamp.realtime > 0)
2483                 fprintf(f,
2484                         "%sExit Timestamp: %s\n"
2485                         "%sExit Code: %s\n"
2486                         "%sExit Status: %i\n",
2487                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2488                         prefix, sigchld_code_to_string(s->code),
2489                         prefix, s->status);
2490 }
2491
2492 char *exec_command_line(char **argv) {
2493         size_t k;
2494         char *n, *p, **a;
2495         bool first = true;
2496
2497         assert(argv);
2498
2499         k = 1;
2500         STRV_FOREACH(a, argv)
2501                 k += strlen(*a)+3;
2502
2503         if (!(n = new(char, k)))
2504                 return NULL;
2505
2506         p = n;
2507         STRV_FOREACH(a, argv) {
2508
2509                 if (!first)
2510                         *(p++) = ' ';
2511                 else
2512                         first = false;
2513
2514                 if (strpbrk(*a, WHITESPACE)) {
2515                         *(p++) = '\'';
2516                         p = stpcpy(p, *a);
2517                         *(p++) = '\'';
2518                 } else
2519                         p = stpcpy(p, *a);
2520
2521         }
2522
2523         *p = 0;
2524
2525         /* FIXME: this doesn't really handle arguments that have
2526          * spaces and ticks in them */
2527
2528         return n;
2529 }
2530
2531 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2532         _cleanup_free_ char *cmd = NULL;
2533         const char *prefix2;
2534
2535         assert(c);
2536         assert(f);
2537
2538         prefix = strempty(prefix);
2539         prefix2 = strappenda(prefix, "\t");
2540
2541         cmd = exec_command_line(c->argv);
2542         fprintf(f,
2543                 "%sCommand Line: %s\n",
2544                 prefix, cmd ? cmd : strerror(ENOMEM));
2545
2546         exec_status_dump(&c->exec_status, f, prefix2);
2547 }
2548
2549 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2550         assert(f);
2551
2552         prefix = strempty(prefix);
2553
2554         LIST_FOREACH(command, c, c)
2555                 exec_command_dump(c, f, prefix);
2556 }
2557
2558 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2559         ExecCommand *end;
2560
2561         assert(l);
2562         assert(e);
2563
2564         if (*l) {
2565                 /* It's kind of important, that we keep the order here */
2566                 LIST_FIND_TAIL(command, *l, end);
2567                 LIST_INSERT_AFTER(command, *l, end, e);
2568         } else
2569               *l = e;
2570 }
2571
2572 int exec_command_set(ExecCommand *c, const char *path, ...) {
2573         va_list ap;
2574         char **l, *p;
2575
2576         assert(c);
2577         assert(path);
2578
2579         va_start(ap, path);
2580         l = strv_new_ap(path, ap);
2581         va_end(ap);
2582
2583         if (!l)
2584                 return -ENOMEM;
2585
2586         p = strdup(path);
2587         if (!p) {
2588                 strv_free(l);
2589                 return -ENOMEM;
2590         }
2591
2592         free(c->path);
2593         c->path = p;
2594
2595         strv_free(c->argv);
2596         c->argv = l;
2597
2598         return 0;
2599 }
2600
2601 int exec_command_append(ExecCommand *c, const char *path, ...) {
2602         _cleanup_strv_free_ char **l = NULL;
2603         va_list ap;
2604         int r;
2605
2606         assert(c);
2607         assert(path);
2608
2609         va_start(ap, path);
2610         l = strv_new_ap(path, ap);
2611         va_end(ap);
2612
2613         if (!l)
2614                 return -ENOMEM;
2615
2616         r = strv_extend_strv(&c->argv, l);
2617         if (r < 0)
2618                 return r;
2619
2620         return 0;
2621 }
2622
2623
2624 static int exec_runtime_allocate(ExecRuntime **rt) {
2625
2626         if (*rt)
2627                 return 0;
2628
2629         *rt = new0(ExecRuntime, 1);
2630         if (!*rt)
2631                 return -ENOMEM;
2632
2633         (*rt)->n_ref = 1;
2634         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2635
2636         return 0;
2637 }
2638
2639 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2640         int r;
2641
2642         assert(rt);
2643         assert(c);
2644         assert(id);
2645
2646         if (*rt)
2647                 return 1;
2648
2649         if (!c->private_network && !c->private_tmp)
2650                 return 0;
2651
2652         r = exec_runtime_allocate(rt);
2653         if (r < 0)
2654                 return r;
2655
2656         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2657                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2658                         return -errno;
2659         }
2660
2661         if (c->private_tmp && !(*rt)->tmp_dir) {
2662                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2663                 if (r < 0)
2664                         return r;
2665         }
2666
2667         return 1;
2668 }
2669
2670 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2671         assert(r);
2672         assert(r->n_ref > 0);
2673
2674         r->n_ref++;
2675         return r;
2676 }
2677
2678 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2679
2680         if (!r)
2681                 return NULL;
2682
2683         assert(r->n_ref > 0);
2684
2685         r->n_ref--;
2686         if (r->n_ref <= 0) {
2687                 free(r->tmp_dir);
2688                 free(r->var_tmp_dir);
2689                 safe_close_pair(r->netns_storage_socket);
2690                 free(r);
2691         }
2692
2693         return NULL;
2694 }
2695
2696 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2697         assert(u);
2698         assert(f);
2699         assert(fds);
2700
2701         if (!rt)
2702                 return 0;
2703
2704         if (rt->tmp_dir)
2705                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2706
2707         if (rt->var_tmp_dir)
2708                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2709
2710         if (rt->netns_storage_socket[0] >= 0) {
2711                 int copy;
2712
2713                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2714                 if (copy < 0)
2715                         return copy;
2716
2717                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2718         }
2719
2720         if (rt->netns_storage_socket[1] >= 0) {
2721                 int copy;
2722
2723                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2724                 if (copy < 0)
2725                         return copy;
2726
2727                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2728         }
2729
2730         return 0;
2731 }
2732
2733 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2734         int r;
2735
2736         assert(rt);
2737         assert(key);
2738         assert(value);
2739
2740         if (streq(key, "tmp-dir")) {
2741                 char *copy;
2742
2743                 r = exec_runtime_allocate(rt);
2744                 if (r < 0)
2745                         return r;
2746
2747                 copy = strdup(value);
2748                 if (!copy)
2749                         return log_oom();
2750
2751                 free((*rt)->tmp_dir);
2752                 (*rt)->tmp_dir = copy;
2753
2754         } else if (streq(key, "var-tmp-dir")) {
2755                 char *copy;
2756
2757                 r = exec_runtime_allocate(rt);
2758                 if (r < 0)
2759                         return r;
2760
2761                 copy = strdup(value);
2762                 if (!copy)
2763                         return log_oom();
2764
2765                 free((*rt)->var_tmp_dir);
2766                 (*rt)->var_tmp_dir = copy;
2767
2768         } else if (streq(key, "netns-socket-0")) {
2769                 int fd;
2770
2771                 r = exec_runtime_allocate(rt);
2772                 if (r < 0)
2773                         return r;
2774
2775                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2776                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2777                 else {
2778                         safe_close((*rt)->netns_storage_socket[0]);
2779                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2780                 }
2781         } else if (streq(key, "netns-socket-1")) {
2782                 int fd;
2783
2784                 r = exec_runtime_allocate(rt);
2785                 if (r < 0)
2786                         return r;
2787
2788                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2789                         log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2790                 else {
2791                         safe_close((*rt)->netns_storage_socket[1]);
2792                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2793                 }
2794         } else
2795                 return 0;
2796
2797         return 1;
2798 }
2799
2800 static void *remove_tmpdir_thread(void *p) {
2801         _cleanup_free_ char *path = p;
2802
2803         rm_rf_dangerous(path, false, true, false);
2804         return NULL;
2805 }
2806
2807 void exec_runtime_destroy(ExecRuntime *rt) {
2808         int r;
2809
2810         if (!rt)
2811                 return;
2812
2813         /* If there are multiple users of this, let's leave the stuff around */
2814         if (rt->n_ref > 1)
2815                 return;
2816
2817         if (rt->tmp_dir) {
2818                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2819
2820                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2821                 if (r < 0) {
2822                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2823                         free(rt->tmp_dir);
2824                 }
2825
2826                 rt->tmp_dir = NULL;
2827         }
2828
2829         if (rt->var_tmp_dir) {
2830                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2831
2832                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2833                 if (r < 0) {
2834                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2835                         free(rt->var_tmp_dir);
2836                 }
2837
2838                 rt->var_tmp_dir = NULL;
2839         }
2840
2841         safe_close_pair(rt->netns_storage_socket);
2842 }
2843
2844 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2845         [EXEC_INPUT_NULL] = "null",
2846         [EXEC_INPUT_TTY] = "tty",
2847         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2848         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2849         [EXEC_INPUT_SOCKET] = "socket"
2850 };
2851
2852 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2853
2854 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2855         [EXEC_OUTPUT_INHERIT] = "inherit",
2856         [EXEC_OUTPUT_NULL] = "null",
2857         [EXEC_OUTPUT_TTY] = "tty",
2858         [EXEC_OUTPUT_SYSLOG] = "syslog",
2859         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2860         [EXEC_OUTPUT_KMSG] = "kmsg",
2861         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2862         [EXEC_OUTPUT_JOURNAL] = "journal",
2863         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2864         [EXEC_OUTPUT_SOCKET] = "socket"
2865 };
2866
2867 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);