chiark / gitweb /
exit-status: add new exit code for custom endpoint errors
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <sys/personality.h>
43 #include <libgen.h>
44 #undef basename
45
46 #ifdef HAVE_PAM
47 #include <security/pam_appl.h>
48 #endif
49
50 #ifdef HAVE_SELINUX
51 #include <selinux/selinux.h>
52 #endif
53
54 #ifdef HAVE_SECCOMP
55 #include <seccomp.h>
56 #endif
57
58 #ifdef HAVE_APPARMOR
59 #include <sys/apparmor.h>
60 #endif
61
62 #include "execute.h"
63 #include "strv.h"
64 #include "macro.h"
65 #include "capability.h"
66 #include "util.h"
67 #include "log.h"
68 #include "sd-messages.h"
69 #include "ioprio.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
73 #include "missing.h"
74 #include "utmp-wtmp.h"
75 #include "def.h"
76 #include "path-util.h"
77 #include "env-util.h"
78 #include "fileio.h"
79 #include "unit.h"
80 #include "async.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
83 #include "af-list.h"
84 #include "mkdir.h"
85 #include "apparmor-util.h"
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
93
94 /* This assumes there is a 'tty' group */
95 #define TTY_MODE 0620
96
97 #define SNDBUF_SIZE (8*1024*1024)
98
99 static int shift_fds(int fds[], unsigned n_fds) {
100         int start, restart_from;
101
102         if (n_fds <= 0)
103                 return 0;
104
105         /* Modifies the fds array! (sorts it) */
106
107         assert(fds);
108
109         start = 0;
110         for (;;) {
111                 int i;
112
113                 restart_from = -1;
114
115                 for (i = start; i < (int) n_fds; i++) {
116                         int nfd;
117
118                         /* Already at right index? */
119                         if (fds[i] == i+3)
120                                 continue;
121
122                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123                                 return -errno;
124
125                         safe_close(fds[i]);
126                         fds[i] = nfd;
127
128                         /* Hmm, the fd we wanted isn't free? Then
129                          * let's remember that and try again from here*/
130                         if (nfd != i+3 && restart_from < 0)
131                                 restart_from = i;
132                 }
133
134                 if (restart_from < 0)
135                         break;
136
137                 start = restart_from;
138         }
139
140         return 0;
141 }
142
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
144         unsigned i;
145         int r;
146
147         if (n_fds <= 0)
148                 return 0;
149
150         assert(fds);
151
152         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
153
154         for (i = 0; i < n_fds; i++) {
155
156                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
157                         return r;
158
159                 /* We unconditionally drop FD_CLOEXEC from the fds,
160                  * since after all we want to pass these fds to our
161                  * children */
162
163                 if ((r = fd_cloexec(fds[i], false)) < 0)
164                         return r;
165         }
166
167         return 0;
168 }
169
170 _pure_ static const char *tty_path(const ExecContext *context) {
171         assert(context);
172
173         if (context->tty_path)
174                 return context->tty_path;
175
176         return "/dev/console";
177 }
178
179 static void exec_context_tty_reset(const ExecContext *context) {
180         assert(context);
181
182         if (context->tty_vhangup)
183                 terminal_vhangup(tty_path(context));
184
185         if (context->tty_reset)
186                 reset_terminal(tty_path(context));
187
188         if (context->tty_vt_disallocate && context->tty_path)
189                 vt_disallocate(context->tty_path);
190 }
191
192 static bool is_terminal_output(ExecOutput o) {
193         return
194                 o == EXEC_OUTPUT_TTY ||
195                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
198 }
199
200 static int open_null_as(int flags, int nfd) {
201         int fd, r;
202
203         assert(nfd >= 0);
204
205         fd = open("/dev/null", flags|O_NOCTTY);
206         if (fd < 0)
207                 return -errno;
208
209         if (fd != nfd) {
210                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211                 safe_close(fd);
212         } else
213                 r = nfd;
214
215         return r;
216 }
217
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
219         int fd, r;
220         union sockaddr_union sa = {
221                 .un.sun_family = AF_UNIX,
222                 .un.sun_path = "/run/systemd/journal/stdout",
223         };
224
225         assert(context);
226         assert(output < _EXEC_OUTPUT_MAX);
227         assert(ident);
228         assert(nfd >= 0);
229
230         fd = socket(AF_UNIX, SOCK_STREAM, 0);
231         if (fd < 0)
232                 return -errno;
233
234         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
235         if (r < 0) {
236                 safe_close(fd);
237                 return -errno;
238         }
239
240         if (shutdown(fd, SHUT_RD) < 0) {
241                 safe_close(fd);
242                 return -errno;
243         }
244
245         fd_inc_sndbuf(fd, SNDBUF_SIZE);
246
247         dprintf(fd,
248                 "%s\n"
249                 "%s\n"
250                 "%i\n"
251                 "%i\n"
252                 "%i\n"
253                 "%i\n"
254                 "%i\n",
255                 context->syslog_identifier ? context->syslog_identifier : ident,
256                 unit_id,
257                 context->syslog_priority,
258                 !!context->syslog_level_prefix,
259                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261                 is_terminal_output(output));
262
263         if (fd != nfd) {
264                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
265                 safe_close(fd);
266         } else
267                 r = nfd;
268
269         return r;
270 }
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
272         int fd, r;
273
274         assert(path);
275         assert(nfd >= 0);
276
277         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
278                 return fd;
279
280         if (fd != nfd) {
281                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
282                 safe_close(fd);
283         } else
284                 r = nfd;
285
286         return r;
287 }
288
289 static bool is_terminal_input(ExecInput i) {
290         return
291                 i == EXEC_INPUT_TTY ||
292                 i == EXEC_INPUT_TTY_FORCE ||
293                 i == EXEC_INPUT_TTY_FAIL;
294 }
295
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
297
298         if (is_terminal_input(std_input) && !apply_tty_stdin)
299                 return EXEC_INPUT_NULL;
300
301         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302                 return EXEC_INPUT_NULL;
303
304         return std_input;
305 }
306
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
308
309         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310                 return EXEC_OUTPUT_INHERIT;
311
312         return std_output;
313 }
314
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
316         ExecInput i;
317
318         assert(context);
319
320         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
321
322         switch (i) {
323
324         case EXEC_INPUT_NULL:
325                 return open_null_as(O_RDONLY, STDIN_FILENO);
326
327         case EXEC_INPUT_TTY:
328         case EXEC_INPUT_TTY_FORCE:
329         case EXEC_INPUT_TTY_FAIL: {
330                 int fd, r;
331
332                 fd = acquire_terminal(tty_path(context),
333                                       i == EXEC_INPUT_TTY_FAIL,
334                                       i == EXEC_INPUT_TTY_FORCE,
335                                       false,
336                                       USEC_INFINITY);
337                 if (fd < 0)
338                         return fd;
339
340                 if (fd != STDIN_FILENO) {
341                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
342                         safe_close(fd);
343                 } else
344                         r = STDIN_FILENO;
345
346                 return r;
347         }
348
349         case EXEC_INPUT_SOCKET:
350                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
351
352         default:
353                 assert_not_reached("Unknown input type");
354         }
355 }
356
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
358         ExecOutput o;
359         ExecInput i;
360         int r;
361
362         assert(context);
363         assert(ident);
364
365         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366         o = fixup_output(context->std_output, socket_fd);
367
368         if (fileno == STDERR_FILENO) {
369                 ExecOutput e;
370                 e = fixup_output(context->std_error, socket_fd);
371
372                 /* This expects the input and output are already set up */
373
374                 /* Don't change the stderr file descriptor if we inherit all
375                  * the way and are not on a tty */
376                 if (e == EXEC_OUTPUT_INHERIT &&
377                     o == EXEC_OUTPUT_INHERIT &&
378                     i == EXEC_INPUT_NULL &&
379                     !is_terminal_input(context->std_input) &&
380                     getppid () != 1)
381                         return fileno;
382
383                 /* Duplicate from stdout if possible */
384                 if (e == o || e == EXEC_OUTPUT_INHERIT)
385                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 o = e;
388
389         } else if (o == EXEC_OUTPUT_INHERIT) {
390                 /* If input got downgraded, inherit the original value */
391                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
393
394                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395                 if (i != EXEC_INPUT_NULL)
396                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
397
398                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
399                 if (getppid() != 1)
400                         return fileno;
401
402                 /* We need to open /dev/null here anew, to get the right access mode. */
403                 return open_null_as(O_WRONLY, fileno);
404         }
405
406         switch (o) {
407
408         case EXEC_OUTPUT_NULL:
409                 return open_null_as(O_WRONLY, fileno);
410
411         case EXEC_OUTPUT_TTY:
412                 if (is_terminal_input(i))
413                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
414
415                 /* We don't reset the terminal if this is just about output */
416                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
417
418         case EXEC_OUTPUT_SYSLOG:
419         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420         case EXEC_OUTPUT_KMSG:
421         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422         case EXEC_OUTPUT_JOURNAL:
423         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424                 r = connect_logger_as(context, o, ident, unit_id, fileno);
425                 if (r < 0) {
426                         log_struct_unit(LOG_CRIT, unit_id,
427                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428                                 fileno == STDOUT_FILENO ? "out" : "err",
429                                 unit_id, strerror(-r),
430                                 "ERRNO=%d", -r,
431                                 NULL);
432                         r = open_null_as(O_WRONLY, fileno);
433                 }
434                 return r;
435
436         case EXEC_OUTPUT_SOCKET:
437                 assert(socket_fd >= 0);
438                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
439
440         default:
441                 assert_not_reached("Unknown error type");
442         }
443 }
444
445 static int chown_terminal(int fd, uid_t uid) {
446         struct stat st;
447
448         assert(fd >= 0);
449
450         /* This might fail. What matters are the results. */
451         (void) fchown(fd, uid, -1);
452         (void) fchmod(fd, TTY_MODE);
453
454         if (fstat(fd, &st) < 0)
455                 return -errno;
456
457         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
458                 return -EPERM;
459
460         return 0;
461 }
462
463 static int setup_confirm_stdio(int *_saved_stdin,
464                                int *_saved_stdout) {
465         int fd = -1, saved_stdin, saved_stdout = -1, r;
466
467         assert(_saved_stdin);
468         assert(_saved_stdout);
469
470         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
471         if (saved_stdin < 0)
472                 return -errno;
473
474         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475         if (saved_stdout < 0) {
476                 r = errno;
477                 goto fail;
478         }
479
480         fd = acquire_terminal(
481                         "/dev/console",
482                         false,
483                         false,
484                         false,
485                         DEFAULT_CONFIRM_USEC);
486         if (fd < 0) {
487                 r = fd;
488                 goto fail;
489         }
490
491         r = chown_terminal(fd, getuid());
492         if (r < 0)
493                 goto fail;
494
495         if (dup2(fd, STDIN_FILENO) < 0) {
496                 r = -errno;
497                 goto fail;
498         }
499
500         if (dup2(fd, STDOUT_FILENO) < 0) {
501                 r = -errno;
502                 goto fail;
503         }
504
505         if (fd >= 2)
506                 safe_close(fd);
507
508         *_saved_stdin = saved_stdin;
509         *_saved_stdout = saved_stdout;
510
511         return 0;
512
513 fail:
514         safe_close(saved_stdout);
515         safe_close(saved_stdin);
516         safe_close(fd);
517
518         return r;
519 }
520
521 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
522         _cleanup_close_ int fd = -1;
523         va_list ap;
524
525         assert(format);
526
527         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
528         if (fd < 0)
529                 return fd;
530
531         va_start(ap, format);
532         vdprintf(fd, format, ap);
533         va_end(ap);
534
535         return 0;
536 }
537
538 static int restore_confirm_stdio(int *saved_stdin,
539                                  int *saved_stdout) {
540
541         int r = 0;
542
543         assert(saved_stdin);
544         assert(saved_stdout);
545
546         release_terminal();
547
548         if (*saved_stdin >= 0)
549                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdout >= 0)
553                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554                         r = -errno;
555
556         safe_close(*saved_stdin);
557         safe_close(*saved_stdout);
558
559         return r;
560 }
561
562 static int ask_for_confirmation(char *response, char **argv) {
563         int saved_stdout = -1, saved_stdin = -1, r;
564         _cleanup_free_ char *line = NULL;
565
566         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567         if (r < 0)
568                 return r;
569
570         line = exec_command_line(argv);
571         if (!line)
572                 return -ENOMEM;
573
574         r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         assert(context);
662
663         /* Sets (but doesn't lookup) the uid and make sure we keep the
664          * capabilities while doing so. */
665
666         if (context->capabilities) {
667                 _cleanup_cap_free_ cap_t d = NULL;
668                 static const cap_value_t bits[] = {
669                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
670                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
671                 };
672
673                 /* First step: If we need to keep capabilities but
674                  * drop privileges we need to make sure we keep our
675                  * caps, while we drop privileges. */
676                 if (uid != 0) {
677                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
678
679                         if (prctl(PR_GET_SECUREBITS) != sb)
680                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681                                         return -errno;
682                 }
683
684                 /* Second step: set the capabilities. This will reduce
685                  * the capabilities to the minimum we need. */
686
687                 d = cap_dup(context->capabilities);
688                 if (!d)
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
693                         return -errno;
694
695                 if (cap_set_proc(d) < 0)
696                         return -errno;
697         }
698
699         /* Third step: actually set the uids */
700         if (setresuid(uid, uid, uid) < 0)
701                 return -errno;
702
703         /* At this point we should have all necessary capabilities but
704            are otherwise a normal user. However, the caps might got
705            corrupted due to the setresuid() so we need clean them up
706            later. This is done outside of this call. */
707
708         return 0;
709 }
710
711 #ifdef HAVE_PAM
712
713 static int null_conv(
714                 int num_msg,
715                 const struct pam_message **msg,
716                 struct pam_response **resp,
717                 void *appdata_ptr) {
718
719         /* We don't support conversations */
720
721         return PAM_CONV_ERR;
722 }
723
724 static int setup_pam(
725                 const char *name,
726                 const char *user,
727                 uid_t uid,
728                 const char *tty,
729                 char ***pam_env,
730                 int fds[], unsigned n_fds) {
731
732         static const struct pam_conv conv = {
733                 .conv = null_conv,
734                 .appdata_ptr = NULL
735         };
736
737         pam_handle_t *handle = NULL;
738         sigset_t ss, old_ss;
739         int pam_code = PAM_SUCCESS;
740         int err;
741         char **e = NULL;
742         bool close_session = false;
743         pid_t pam_pid = 0, parent_pid;
744         int flags = 0;
745
746         assert(name);
747         assert(user);
748         assert(pam_env);
749
750         /* We set up PAM in the parent process, then fork. The child
751          * will then stay around until killed via PR_GET_PDEATHSIG or
752          * systemd via the cgroup logic. It will then remove the PAM
753          * session again. The parent process will exec() the actual
754          * daemon. We do things this way to ensure that the main PID
755          * of the daemon is the one we initially fork()ed. */
756
757         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
758                 flags |= PAM_SILENT;
759
760         pam_code = pam_start(name, user, &conv, &handle);
761         if (pam_code != PAM_SUCCESS) {
762                 handle = NULL;
763                 goto fail;
764         }
765
766         if (tty) {
767                 pam_code = pam_set_item(handle, PAM_TTY, tty);
768                 if (pam_code != PAM_SUCCESS)
769                         goto fail;
770         }
771
772         pam_code = pam_acct_mgmt(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         pam_code = pam_open_session(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         e = pam_getenvlist(handle);
783         if (!e) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         pam_pid = fork();
798         if (pam_pid < 0)
799                 goto fail;
800
801         if (pam_pid == 0) {
802                 int sig;
803                 int r = EXIT_PAM;
804
805                 /* The child's job is to reset the PAM session on
806                  * termination */
807
808                 /* This string must fit in 10 chars (i.e. the length
809                  * of "/sbin/init"), to look pretty in /bin/ps */
810                 rename_process("(sd-pam)");
811
812                 /* Make sure we don't keep open the passed fds in this
813                 child. We assume that otherwise only those fds are
814                 open here that have been opened by PAM. */
815                 close_many(fds, n_fds);
816
817                 /* Drop privileges - we don't need any to pam_close_session
818                  * and this will make PR_SET_PDEATHSIG work in most cases.
819                  * If this fails, ignore the error - but expect sd-pam threads
820                  * to fail to exit normally */
821                 if (setresuid(uid, uid, uid) < 0)
822                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823
824                 /* Wait until our parent died. This will only work if
825                  * the above setresuid() succeeds, otherwise the kernel
826                  * will not allow unprivileged parents kill their privileged
827                  * children this way. We rely on the control groups kill logic
828                  * to do the rest for us. */
829                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830                         goto child_finish;
831
832                 /* Check if our parent process might already have
833                  * died? */
834                 if (getppid() == parent_pid) {
835                         for (;;) {
836                                 if (sigwait(&ss, &sig) < 0) {
837                                         if (errno == EINTR)
838                                                 continue;
839
840                                         goto child_finish;
841                                 }
842
843                                 assert(sig == SIGTERM);
844                                 break;
845                         }
846                 }
847
848                 /* If our parent died we'll end the session */
849                 if (getppid() != parent_pid) {
850                         pam_code = pam_close_session(handle, flags);
851                         if (pam_code != PAM_SUCCESS)
852                                 goto child_finish;
853                 }
854
855                 r = 0;
856
857         child_finish:
858                 pam_end(handle, pam_code | flags);
859                 _exit(r);
860         }
861
862         /* If the child was forked off successfully it will do all the
863          * cleanups, so forget about the handle here. */
864         handle = NULL;
865
866         /* Unblock SIGTERM again in the parent */
867         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
868                 goto fail;
869
870         /* We close the log explicitly here, since the PAM modules
871          * might have opened it, but we don't want this fd around. */
872         closelog();
873
874         *pam_env = e;
875         e = NULL;
876
877         return 0;
878
879 fail:
880         if (pam_code != PAM_SUCCESS) {
881                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882                 err = -EPERM;  /* PAM errors do not map to errno */
883         } else {
884                 log_error("PAM failed: %m");
885                 err = -errno;
886         }
887
888         if (handle) {
889                 if (close_session)
890                         pam_code = pam_close_session(handle, flags);
891
892                 pam_end(handle, pam_code | flags);
893         }
894
895         strv_free(e);
896
897         closelog();
898
899         if (pam_pid > 1) {
900                 kill(pam_pid, SIGTERM);
901                 kill(pam_pid, SIGCONT);
902         }
903
904         return err;
905 }
906 #endif
907
908 static void rename_process_from_path(const char *path) {
909         char process_name[11];
910         const char *p;
911         size_t l;
912
913         /* This resulting string must fit in 10 chars (i.e. the length
914          * of "/sbin/init") to look pretty in /bin/ps */
915
916         p = basename(path);
917         if (isempty(p)) {
918                 rename_process("(...)");
919                 return;
920         }
921
922         l = strlen(p);
923         if (l > 8) {
924                 /* The end of the process name is usually more
925                  * interesting, since the first bit might just be
926                  * "systemd-" */
927                 p = p + l - 8;
928                 l = 8;
929         }
930
931         process_name[0] = '(';
932         memcpy(process_name+1, p, l);
933         process_name[1+l] = ')';
934         process_name[1+l+1] = 0;
935
936         rename_process(process_name);
937 }
938
939 #ifdef HAVE_SECCOMP
940
941 static int apply_seccomp(ExecContext *c) {
942         uint32_t negative_action, action;
943         scmp_filter_ctx *seccomp;
944         Iterator i;
945         void *id;
946         int r;
947
948         assert(c);
949
950         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
951
952         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
953         if (!seccomp)
954                 return -ENOMEM;
955
956         if (c->syscall_archs) {
957
958                 SET_FOREACH(id, c->syscall_archs, i) {
959                         r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
960                         if (r == -EEXIST)
961                                 continue;
962                         if (r < 0)
963                                 goto finish;
964                 }
965
966         } else {
967                 r = seccomp_add_secondary_archs(seccomp);
968                 if (r < 0)
969                         goto finish;
970         }
971
972         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
973         SET_FOREACH(id, c->syscall_filter, i) {
974                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
975                 if (r < 0)
976                         goto finish;
977         }
978
979         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
980         if (r < 0)
981                 goto finish;
982
983         r = seccomp_load(seccomp);
984
985 finish:
986         seccomp_release(seccomp);
987         return r;
988 }
989
990 static int apply_address_families(ExecContext *c) {
991         scmp_filter_ctx *seccomp;
992         Iterator i;
993         int r;
994
995         assert(c);
996
997         seccomp = seccomp_init(SCMP_ACT_ALLOW);
998         if (!seccomp)
999                 return -ENOMEM;
1000
1001         r = seccomp_add_secondary_archs(seccomp);
1002         if (r < 0)
1003                 goto finish;
1004
1005         if (c->address_families_whitelist) {
1006                 int af, first = 0, last = 0;
1007                 void *afp;
1008
1009                 /* If this is a whitelist, we first block the address
1010                  * families that are out of range and then everything
1011                  * that is not in the set. First, we find the lowest
1012                  * and highest address family in the set. */
1013
1014                 SET_FOREACH(afp, c->address_families, i) {
1015                         af = PTR_TO_INT(afp);
1016
1017                         if (af <= 0 || af >= af_max())
1018                                 continue;
1019
1020                         if (first == 0 || af < first)
1021                                 first = af;
1022
1023                         if (last == 0 || af > last)
1024                                 last = af;
1025                 }
1026
1027                 assert((first == 0) == (last == 0));
1028
1029                 if (first == 0) {
1030
1031                         /* No entries in the valid range, block everything */
1032                         r = seccomp_rule_add(
1033                                         seccomp,
1034                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1035                                         SCMP_SYS(socket),
1036                                         0);
1037                         if (r < 0)
1038                                 goto finish;
1039
1040                 } else {
1041
1042                         /* Block everything below the first entry */
1043                         r = seccomp_rule_add(
1044                                         seccomp,
1045                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1046                                         SCMP_SYS(socket),
1047                                         1,
1048                                         SCMP_A0(SCMP_CMP_LT, first));
1049                         if (r < 0)
1050                                 goto finish;
1051
1052                         /* Block everything above the last entry */
1053                         r = seccomp_rule_add(
1054                                         seccomp,
1055                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1056                                         SCMP_SYS(socket),
1057                                         1,
1058                                         SCMP_A0(SCMP_CMP_GT, last));
1059                         if (r < 0)
1060                                 goto finish;
1061
1062                         /* Block everything between the first and last
1063                          * entry */
1064                         for (af = 1; af < af_max(); af++) {
1065
1066                                 if (set_contains(c->address_families, INT_TO_PTR(af)))
1067                                         continue;
1068
1069                                 r = seccomp_rule_add(
1070                                                 seccomp,
1071                                                 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1072                                                 SCMP_SYS(socket),
1073                                                 1,
1074                                                 SCMP_A0(SCMP_CMP_EQ, af));
1075                                 if (r < 0)
1076                                         goto finish;
1077                         }
1078                 }
1079
1080         } else {
1081                 void *af;
1082
1083                 /* If this is a blacklist, then generate one rule for
1084                  * each address family that are then combined in OR
1085                  * checks. */
1086
1087                 SET_FOREACH(af, c->address_families, i) {
1088
1089                         r = seccomp_rule_add(
1090                                         seccomp,
1091                                         SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092                                         SCMP_SYS(socket),
1093                                         1,
1094                                         SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1095                         if (r < 0)
1096                                 goto finish;
1097                 }
1098         }
1099
1100         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1101         if (r < 0)
1102                 goto finish;
1103
1104         r = seccomp_load(seccomp);
1105
1106 finish:
1107         seccomp_release(seccomp);
1108         return r;
1109 }
1110
1111 #endif
1112
1113 static void do_idle_pipe_dance(int idle_pipe[4]) {
1114         assert(idle_pipe);
1115
1116
1117         safe_close(idle_pipe[1]);
1118         safe_close(idle_pipe[2]);
1119
1120         if (idle_pipe[0] >= 0) {
1121                 int r;
1122
1123                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1124
1125                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1126                         /* Signal systemd that we are bored and want to continue. */
1127                         write(idle_pipe[3], "x", 1);
1128
1129                         /* Wait for systemd to react to the signal above. */
1130                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1131                 }
1132
1133                 safe_close(idle_pipe[0]);
1134
1135         }
1136
1137         safe_close(idle_pipe[3]);
1138 }
1139
1140 static int build_environment(
1141                 const ExecContext *c,
1142                 unsigned n_fds,
1143                 usec_t watchdog_usec,
1144                 const char *home,
1145                 const char *username,
1146                 const char *shell,
1147                 char ***ret) {
1148
1149         _cleanup_strv_free_ char **our_env = NULL;
1150         unsigned n_env = 0;
1151         char *x;
1152
1153         assert(c);
1154         assert(ret);
1155
1156         our_env = new0(char*, 10);
1157         if (!our_env)
1158                 return -ENOMEM;
1159
1160         if (n_fds > 0) {
1161                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1162                         return -ENOMEM;
1163                 our_env[n_env++] = x;
1164
1165                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1166                         return -ENOMEM;
1167                 our_env[n_env++] = x;
1168         }
1169
1170         if (watchdog_usec > 0) {
1171                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1172                         return -ENOMEM;
1173                 our_env[n_env++] = x;
1174
1175                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1176                         return -ENOMEM;
1177                 our_env[n_env++] = x;
1178         }
1179
1180         if (home) {
1181                 x = strappend("HOME=", home);
1182                 if (!x)
1183                         return -ENOMEM;
1184                 our_env[n_env++] = x;
1185         }
1186
1187         if (username) {
1188                 x = strappend("LOGNAME=", username);
1189                 if (!x)
1190                         return -ENOMEM;
1191                 our_env[n_env++] = x;
1192
1193                 x = strappend("USER=", username);
1194                 if (!x)
1195                         return -ENOMEM;
1196                 our_env[n_env++] = x;
1197         }
1198
1199         if (shell) {
1200                 x = strappend("SHELL=", shell);
1201                 if (!x)
1202                         return -ENOMEM;
1203                 our_env[n_env++] = x;
1204         }
1205
1206         if (is_terminal_input(c->std_input) ||
1207             c->std_output == EXEC_OUTPUT_TTY ||
1208             c->std_error == EXEC_OUTPUT_TTY ||
1209             c->tty_path) {
1210
1211                 x = strdup(default_term_for_tty(tty_path(c)));
1212                 if (!x)
1213                         return -ENOMEM;
1214                 our_env[n_env++] = x;
1215         }
1216
1217         our_env[n_env++] = NULL;
1218         assert(n_env <= 10);
1219
1220         *ret = our_env;
1221         our_env = NULL;
1222
1223         return 0;
1224 }
1225
1226 static int exec_child(ExecCommand *command,
1227                       const ExecContext *context,
1228                       const ExecParameters *params,
1229                       ExecRuntime *runtime,
1230                       char **argv,
1231                       int socket_fd,
1232                       int *fds, unsigned n_fds,
1233                       char **files_env,
1234                       int *error) {
1235
1236         _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1237         const char *username = NULL, *home = NULL, *shell = NULL;
1238         unsigned n_dont_close = 0;
1239         int dont_close[n_fds + 3];
1240         uid_t uid = (uid_t) -1;
1241         gid_t gid = (gid_t) -1;
1242         int i, err;
1243
1244         assert(command);
1245         assert(context);
1246         assert(params);
1247         assert(error);
1248
1249         rename_process_from_path(command->path);
1250
1251         /* We reset exactly these signals, since they are the
1252          * only ones we set to SIG_IGN in the main daemon. All
1253          * others we leave untouched because we set them to
1254          * SIG_DFL or a valid handler initially, both of which
1255          * will be demoted to SIG_DFL. */
1256         default_signals(SIGNALS_CRASH_HANDLER,
1257                         SIGNALS_IGNORE, -1);
1258
1259         if (context->ignore_sigpipe)
1260                 ignore_signals(SIGPIPE, -1);
1261
1262         err = reset_signal_mask();
1263         if (err < 0) {
1264                 *error = EXIT_SIGNAL_MASK;
1265                 return err;
1266         }
1267
1268         if (params->idle_pipe)
1269                 do_idle_pipe_dance(params->idle_pipe);
1270
1271         /* Close sockets very early to make sure we don't
1272          * block init reexecution because it cannot bind its
1273          * sockets */
1274         log_forget_fds();
1275
1276         if (socket_fd >= 0)
1277                 dont_close[n_dont_close++] = socket_fd;
1278         if (n_fds > 0) {
1279                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1280                 n_dont_close += n_fds;
1281         }
1282         if (runtime) {
1283                 if (runtime->netns_storage_socket[0] >= 0)
1284                         dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1285                 if (runtime->netns_storage_socket[1] >= 0)
1286                         dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1287         }
1288
1289         err = close_all_fds(dont_close, n_dont_close);
1290         if (err < 0) {
1291                 *error = EXIT_FDS;
1292                 return err;
1293         }
1294
1295         if (!context->same_pgrp)
1296                 if (setsid() < 0) {
1297                         *error = EXIT_SETSID;
1298                         return -errno;
1299                 }
1300
1301         exec_context_tty_reset(context);
1302
1303         if (params->confirm_spawn) {
1304                 char response;
1305
1306                 err = ask_for_confirmation(&response, argv);
1307                 if (err == -ETIMEDOUT)
1308                         write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1309                 else if (err < 0)
1310                         write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1311                 else if (response == 's') {
1312                         write_confirm_message("Skipping execution.\n");
1313                         *error = EXIT_CONFIRM;
1314                         return -ECANCELED;
1315                 } else if (response == 'n') {
1316                         write_confirm_message("Failing execution.\n");
1317                         *error = 0;
1318                         return 0;
1319                 }
1320         }
1321
1322         /* If a socket is connected to STDIN/STDOUT/STDERR, we
1323          * must sure to drop O_NONBLOCK */
1324         if (socket_fd >= 0)
1325                 fd_nonblock(socket_fd, false);
1326
1327         err = setup_input(context, socket_fd, params->apply_tty_stdin);
1328         if (err < 0) {
1329                 *error = EXIT_STDIN;
1330                 return err;
1331         }
1332
1333         err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1334         if (err < 0) {
1335                 *error = EXIT_STDOUT;
1336                 return err;
1337         }
1338
1339         err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1340         if (err < 0) {
1341                 *error = EXIT_STDERR;
1342                 return err;
1343         }
1344
1345         if (params->cgroup_path) {
1346                 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1347                 if (err < 0) {
1348                         *error = EXIT_CGROUP;
1349                         return err;
1350                 }
1351         }
1352
1353         if (context->oom_score_adjust_set) {
1354                 char t[16];
1355
1356                 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1357                 char_array_0(t);
1358
1359                 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1360                         *error = EXIT_OOM_ADJUST;
1361                         return -errno;
1362                 }
1363         }
1364
1365         if (context->nice_set)
1366                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1367                         *error = EXIT_NICE;
1368                         return -errno;
1369                 }
1370
1371         if (context->cpu_sched_set) {
1372                 struct sched_param param = {
1373                         .sched_priority = context->cpu_sched_priority,
1374                 };
1375
1376                 err = sched_setscheduler(0,
1377                                          context->cpu_sched_policy |
1378                                          (context->cpu_sched_reset_on_fork ?
1379                                           SCHED_RESET_ON_FORK : 0),
1380                                          &param);
1381                 if (err < 0) {
1382                         *error = EXIT_SETSCHEDULER;
1383                         return -errno;
1384                 }
1385         }
1386
1387         if (context->cpuset)
1388                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1389                         *error = EXIT_CPUAFFINITY;
1390                         return -errno;
1391                 }
1392
1393         if (context->ioprio_set)
1394                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1395                         *error = EXIT_IOPRIO;
1396                         return -errno;
1397                 }
1398
1399         if (context->timer_slack_nsec != NSEC_INFINITY)
1400                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1401                         *error = EXIT_TIMERSLACK;
1402                         return -errno;
1403                 }
1404
1405         if (context->personality != 0xffffffffUL)
1406                 if (personality(context->personality) < 0) {
1407                         *error = EXIT_PERSONALITY;
1408                         return -errno;
1409                 }
1410
1411         if (context->utmp_id)
1412                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1413
1414         if (context->user) {
1415                 username = context->user;
1416                 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1417                 if (err < 0) {
1418                         *error = EXIT_USER;
1419                         return err;
1420                 }
1421
1422                 if (is_terminal_input(context->std_input)) {
1423                         err = chown_terminal(STDIN_FILENO, uid);
1424                         if (err < 0) {
1425                                 *error = EXIT_STDIN;
1426                                 return err;
1427                         }
1428                 }
1429         }
1430
1431 #ifdef HAVE_PAM
1432         if (params->cgroup_path && context->user && context->pam_name) {
1433                 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1434                 if (err < 0) {
1435                         *error = EXIT_CGROUP;
1436                         return err;
1437                 }
1438
1439
1440                 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1441                 if (err < 0) {
1442                         *error = EXIT_CGROUP;
1443                         return err;
1444                 }
1445         }
1446 #endif
1447
1448         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1449                 char **rt;
1450
1451                 STRV_FOREACH(rt, context->runtime_directory) {
1452                         _cleanup_free_ char *p;
1453
1454                         p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1455                         if (!p) {
1456                                 *error = EXIT_RUNTIME_DIRECTORY;
1457                                 return -ENOMEM;
1458                         }
1459
1460                         err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1461                         if (err < 0) {
1462                                 *error = EXIT_RUNTIME_DIRECTORY;
1463                                 return err;
1464                         }
1465                 }
1466         }
1467
1468         if (params->apply_permissions) {
1469                 err = enforce_groups(context, username, gid);
1470                 if (err < 0) {
1471                         *error = EXIT_GROUP;
1472                         return err;
1473                 }
1474         }
1475
1476         umask(context->umask);
1477
1478 #ifdef HAVE_PAM
1479         if (params->apply_permissions && context->pam_name && username) {
1480                 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1481                 if (err < 0) {
1482                         *error = EXIT_PAM;
1483                         return err;
1484                 }
1485         }
1486 #endif
1487
1488         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1489                 err = setup_netns(runtime->netns_storage_socket);
1490                 if (err < 0) {
1491                         *error = EXIT_NETWORK;
1492                         return err;
1493                 }
1494         }
1495
1496         if (!strv_isempty(context->read_write_dirs) ||
1497             !strv_isempty(context->read_only_dirs) ||
1498             !strv_isempty(context->inaccessible_dirs) ||
1499             context->mount_flags != 0 ||
1500             (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1501             context->private_devices ||
1502             context->protect_system != PROTECT_SYSTEM_NO ||
1503             context->protect_home != PROTECT_HOME_NO) {
1504
1505                 char *tmp = NULL, *var = NULL;
1506
1507                 /* The runtime struct only contains the parent
1508                  * of the private /tmp, which is
1509                  * non-accessible to world users. Inside of it
1510                  * there's a /tmp that is sticky, and that's
1511                  * the one we want to use here. */
1512
1513                 if (context->private_tmp && runtime) {
1514                         if (runtime->tmp_dir)
1515                                 tmp = strappenda(runtime->tmp_dir, "/tmp");
1516                         if (runtime->var_tmp_dir)
1517                                 var = strappenda(runtime->var_tmp_dir, "/tmp");
1518                 }
1519
1520                 err = setup_namespace(
1521                                 context->read_write_dirs,
1522                                 context->read_only_dirs,
1523                                 context->inaccessible_dirs,
1524                                 tmp,
1525                                 var,
1526                                 NULL,
1527                                 context->private_devices,
1528                                 context->protect_home,
1529                                 context->protect_system,
1530                                 context->mount_flags);
1531                 if (err < 0) {
1532                         *error = EXIT_NAMESPACE;
1533                         return err;
1534                 }
1535         }
1536
1537         if (params->apply_chroot) {
1538                 if (context->root_directory)
1539                         if (chroot(context->root_directory) < 0) {
1540                                 *error = EXIT_CHROOT;
1541                                 return -errno;
1542                         }
1543
1544                 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1545                         *error = EXIT_CHDIR;
1546                         return -errno;
1547                 }
1548         } else {
1549                 _cleanup_free_ char *d = NULL;
1550
1551                 if (asprintf(&d, "%s/%s",
1552                              context->root_directory ? context->root_directory : "",
1553                              context->working_directory ? context->working_directory : "") < 0) {
1554                         *error = EXIT_MEMORY;
1555                         return -ENOMEM;
1556                 }
1557
1558                 if (chdir(d) < 0) {
1559                         *error = EXIT_CHDIR;
1560                         return -errno;
1561                 }
1562         }
1563
1564         /* We repeat the fd closing here, to make sure that
1565          * nothing is leaked from the PAM modules. Note that
1566          * we are more aggressive this time since socket_fd
1567          * and the netns fds we don#t need anymore. */
1568         err = close_all_fds(fds, n_fds);
1569         if (err >= 0)
1570                 err = shift_fds(fds, n_fds);
1571         if (err >= 0)
1572                 err = flags_fds(fds, n_fds, context->non_blocking);
1573         if (err < 0) {
1574                 *error = EXIT_FDS;
1575                 return err;
1576         }
1577
1578         if (params->apply_permissions) {
1579
1580                 for (i = 0; i < _RLIMIT_MAX; i++) {
1581                         if (!context->rlimit[i])
1582                                 continue;
1583
1584                         if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1585                                 *error = EXIT_LIMITS;
1586                                 return -errno;
1587                         }
1588                 }
1589
1590                 if (context->capability_bounding_set_drop) {
1591                         err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1592                         if (err < 0) {
1593                                 *error = EXIT_CAPABILITIES;
1594                                 return err;
1595                         }
1596                 }
1597
1598                 if (context->user) {
1599                         err = enforce_user(context, uid);
1600                         if (err < 0) {
1601                                 *error = EXIT_USER;
1602                                 return err;
1603                         }
1604                 }
1605
1606                 /* PR_GET_SECUREBITS is not privileged, while
1607                  * PR_SET_SECUREBITS is. So to suppress
1608                  * potential EPERMs we'll try not to call
1609                  * PR_SET_SECUREBITS unless necessary. */
1610                 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1611                         if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1612                                 *error = EXIT_SECUREBITS;
1613                                 return -errno;
1614                         }
1615
1616                 if (context->capabilities)
1617                         if (cap_set_proc(context->capabilities) < 0) {
1618                                 *error = EXIT_CAPABILITIES;
1619                                 return -errno;
1620                         }
1621
1622                 if (context->no_new_privileges)
1623                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1624                                 *error = EXIT_NO_NEW_PRIVILEGES;
1625                                 return -errno;
1626                         }
1627
1628 #ifdef HAVE_SECCOMP
1629                 if (context->address_families_whitelist ||
1630                     !set_isempty(context->address_families)) {
1631                         err = apply_address_families(context);
1632                         if (err < 0) {
1633                                 *error = EXIT_ADDRESS_FAMILIES;
1634                                 return err;
1635                         }
1636                 }
1637
1638                 if (context->syscall_whitelist ||
1639                     !set_isempty(context->syscall_filter) ||
1640                     !set_isempty(context->syscall_archs)) {
1641                         err = apply_seccomp(context);
1642                         if (err < 0) {
1643                                 *error = EXIT_SECCOMP;
1644                                 return err;
1645                         }
1646                 }
1647 #endif
1648
1649 #ifdef HAVE_SELINUX
1650                 if (context->selinux_context && use_selinux()) {
1651                         err = setexeccon(context->selinux_context);
1652                         if (err < 0 && !context->selinux_context_ignore) {
1653                                 *error = EXIT_SELINUX_CONTEXT;
1654                                 return err;
1655                         }
1656                 }
1657 #endif
1658
1659 #ifdef HAVE_APPARMOR
1660                 if (context->apparmor_profile && use_apparmor()) {
1661                         err = aa_change_onexec(context->apparmor_profile);
1662                         if (err < 0 && !context->apparmor_profile_ignore) {
1663                                 *error = EXIT_APPARMOR_PROFILE;
1664                                 return err;
1665                         }
1666                 }
1667 #endif
1668         }
1669
1670         err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1671         if (err < 0) {
1672                 *error = EXIT_MEMORY;
1673                 return err;
1674         }
1675
1676         final_env = strv_env_merge(5,
1677                                    params->environment,
1678                                    our_env,
1679                                    context->environment,
1680                                    files_env,
1681                                    pam_env,
1682                                    NULL);
1683         if (!final_env) {
1684                 *error = EXIT_MEMORY;
1685                 return -ENOMEM;
1686         }
1687
1688         final_argv = replace_env_argv(argv, final_env);
1689         if (!final_argv) {
1690                 *error = EXIT_MEMORY;
1691                 return -ENOMEM;
1692         }
1693
1694         final_env = strv_env_clean(final_env);
1695
1696         if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1697                 _cleanup_free_ char *line;
1698
1699                 line = exec_command_line(final_argv);
1700                 if (line) {
1701                         log_open();
1702                         log_struct_unit(LOG_DEBUG,
1703                                         params->unit_id,
1704                                         "EXECUTABLE=%s", command->path,
1705                                         "MESSAGE=Executing: %s", line,
1706                                         NULL);
1707                         log_close();
1708                 }
1709         }
1710         execve(command->path, final_argv, final_env);
1711         *error = EXIT_EXEC;
1712         return -errno;
1713 }
1714
1715 int exec_spawn(ExecCommand *command,
1716                const ExecContext *context,
1717                const ExecParameters *params,
1718                ExecRuntime *runtime,
1719                pid_t *ret) {
1720
1721         _cleanup_strv_free_ char **files_env = NULL;
1722         int *fds = NULL; unsigned n_fds = 0;
1723         char *line, **argv;
1724         int socket_fd;
1725         pid_t pid;
1726         int err;
1727
1728         assert(command);
1729         assert(context);
1730         assert(ret);
1731         assert(params);
1732         assert(params->fds || params->n_fds <= 0);
1733
1734         if (context->std_input == EXEC_INPUT_SOCKET ||
1735             context->std_output == EXEC_OUTPUT_SOCKET ||
1736             context->std_error == EXEC_OUTPUT_SOCKET) {
1737
1738                 if (params->n_fds != 1)
1739                         return -EINVAL;
1740
1741                 socket_fd = params->fds[0];
1742         } else {
1743                 socket_fd = -1;
1744                 fds = params->fds;
1745                 n_fds = params->n_fds;
1746         }
1747
1748         err = exec_context_load_environment(context, &files_env);
1749         if (err < 0) {
1750                 log_struct_unit(LOG_ERR,
1751                            params->unit_id,
1752                            "MESSAGE=Failed to load environment files: %s", strerror(-err),
1753                            "ERRNO=%d", -err,
1754                            NULL);
1755                 return err;
1756         }
1757
1758         argv = params->argv ?: command->argv;
1759
1760         line = exec_command_line(argv);
1761         if (!line)
1762                 return log_oom();
1763
1764         log_struct_unit(LOG_DEBUG,
1765                         params->unit_id,
1766                         "EXECUTABLE=%s", command->path,
1767                         "MESSAGE=About to execute: %s", line,
1768                         NULL);
1769         free(line);
1770
1771         pid = fork();
1772         if (pid < 0)
1773                 return -errno;
1774
1775         if (pid == 0) {
1776                 int r;
1777
1778                 err = exec_child(command,
1779                                  context,
1780                                  params,
1781                                  runtime,
1782                                  argv,
1783                                  socket_fd,
1784                                  fds, n_fds,
1785                                  files_env,
1786                                  &r);
1787                 if (r != 0) {
1788                         log_open();
1789                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1790                                    "EXECUTABLE=%s", command->path,
1791                                    "MESSAGE=Failed at step %s spawning %s: %s",
1792                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1793                                           command->path, strerror(-err),
1794                                    "ERRNO=%d", -err,
1795                                    NULL);
1796                         log_close();
1797                 }
1798
1799                 _exit(r);
1800         }
1801
1802         log_struct_unit(LOG_DEBUG,
1803                         params->unit_id,
1804                         "MESSAGE=Forked %s as "PID_FMT,
1805                         command->path, pid,
1806                         NULL);
1807
1808         /* We add the new process to the cgroup both in the child (so
1809          * that we can be sure that no user code is ever executed
1810          * outside of the cgroup) and in the parent (so that we can be
1811          * sure that when we kill the cgroup the process will be
1812          * killed too). */
1813         if (params->cgroup_path)
1814                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1815
1816         exec_status_start(&command->exec_status, pid);
1817
1818         *ret = pid;
1819         return 0;
1820 }
1821
1822 void exec_context_init(ExecContext *c) {
1823         assert(c);
1824
1825         c->umask = 0022;
1826         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1827         c->cpu_sched_policy = SCHED_OTHER;
1828         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1829         c->syslog_level_prefix = true;
1830         c->ignore_sigpipe = true;
1831         c->timer_slack_nsec = NSEC_INFINITY;
1832         c->personality = 0xffffffffUL;
1833         c->runtime_directory_mode = 0755;
1834 }
1835
1836 void exec_context_done(ExecContext *c) {
1837         unsigned l;
1838
1839         assert(c);
1840
1841         strv_free(c->environment);
1842         c->environment = NULL;
1843
1844         strv_free(c->environment_files);
1845         c->environment_files = NULL;
1846
1847         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1848                 free(c->rlimit[l]);
1849                 c->rlimit[l] = NULL;
1850         }
1851
1852         free(c->working_directory);
1853         c->working_directory = NULL;
1854         free(c->root_directory);
1855         c->root_directory = NULL;
1856
1857         free(c->tty_path);
1858         c->tty_path = NULL;
1859
1860         free(c->syslog_identifier);
1861         c->syslog_identifier = NULL;
1862
1863         free(c->user);
1864         c->user = NULL;
1865
1866         free(c->group);
1867         c->group = NULL;
1868
1869         strv_free(c->supplementary_groups);
1870         c->supplementary_groups = NULL;
1871
1872         free(c->pam_name);
1873         c->pam_name = NULL;
1874
1875         if (c->capabilities) {
1876                 cap_free(c->capabilities);
1877                 c->capabilities = NULL;
1878         }
1879
1880         strv_free(c->read_only_dirs);
1881         c->read_only_dirs = NULL;
1882
1883         strv_free(c->read_write_dirs);
1884         c->read_write_dirs = NULL;
1885
1886         strv_free(c->inaccessible_dirs);
1887         c->inaccessible_dirs = NULL;
1888
1889         if (c->cpuset)
1890                 CPU_FREE(c->cpuset);
1891
1892         free(c->utmp_id);
1893         c->utmp_id = NULL;
1894
1895         free(c->selinux_context);
1896         c->selinux_context = NULL;
1897
1898         free(c->apparmor_profile);
1899         c->apparmor_profile = NULL;
1900
1901         set_free(c->syscall_filter);
1902         c->syscall_filter = NULL;
1903
1904         set_free(c->syscall_archs);
1905         c->syscall_archs = NULL;
1906
1907         set_free(c->address_families);
1908         c->address_families = NULL;
1909
1910         strv_free(c->runtime_directory);
1911         c->runtime_directory = NULL;
1912
1913         bus_endpoint_free(c->bus_endpoint);
1914         c->bus_endpoint = NULL;
1915 }
1916
1917 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1918         char **i;
1919
1920         assert(c);
1921
1922         if (!runtime_prefix)
1923                 return 0;
1924
1925         STRV_FOREACH(i, c->runtime_directory) {
1926                 _cleanup_free_ char *p;
1927
1928                 p = strjoin(runtime_prefix, "/", *i, NULL);
1929                 if (!p)
1930                         return -ENOMEM;
1931
1932                 /* We execute this synchronously, since we need to be
1933                  * sure this is gone when we start the service
1934                  * next. */
1935                 rm_rf_dangerous(p, false, true, false);
1936         }
1937
1938         return 0;
1939 }
1940
1941 void exec_command_done(ExecCommand *c) {
1942         assert(c);
1943
1944         free(c->path);
1945         c->path = NULL;
1946
1947         strv_free(c->argv);
1948         c->argv = NULL;
1949 }
1950
1951 void exec_command_done_array(ExecCommand *c, unsigned n) {
1952         unsigned i;
1953
1954         for (i = 0; i < n; i++)
1955                 exec_command_done(c+i);
1956 }
1957
1958 void exec_command_free_list(ExecCommand *c) {
1959         ExecCommand *i;
1960
1961         while ((i = c)) {
1962                 LIST_REMOVE(command, c, i);
1963                 exec_command_done(i);
1964                 free(i);
1965         }
1966 }
1967
1968 void exec_command_free_array(ExecCommand **c, unsigned n) {
1969         unsigned i;
1970
1971         for (i = 0; i < n; i++) {
1972                 exec_command_free_list(c[i]);
1973                 c[i] = NULL;
1974         }
1975 }
1976
1977 int exec_context_load_environment(const ExecContext *c, char ***l) {
1978         char **i, **r = NULL;
1979
1980         assert(c);
1981         assert(l);
1982
1983         STRV_FOREACH(i, c->environment_files) {
1984                 char *fn;
1985                 int k;
1986                 bool ignore = false;
1987                 char **p;
1988                 _cleanup_globfree_ glob_t pglob = {};
1989                 int count, n;
1990
1991                 fn = *i;
1992
1993                 if (fn[0] == '-') {
1994                         ignore = true;
1995                         fn ++;
1996                 }
1997
1998                 if (!path_is_absolute(fn)) {
1999                         if (ignore)
2000                                 continue;
2001
2002                         strv_free(r);
2003                         return -EINVAL;
2004                 }
2005
2006                 /* Filename supports globbing, take all matching files */
2007                 errno = 0;
2008                 if (glob(fn, 0, NULL, &pglob) != 0) {
2009                         if (ignore)
2010                                 continue;
2011
2012                         strv_free(r);
2013                         return errno ? -errno : -EINVAL;
2014                 }
2015                 count = pglob.gl_pathc;
2016                 if (count == 0) {
2017                         if (ignore)
2018                                 continue;
2019
2020                         strv_free(r);
2021                         return -EINVAL;
2022                 }
2023                 for (n = 0; n < count; n++) {
2024                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2025                         if (k < 0) {
2026                                 if (ignore)
2027                                         continue;
2028
2029                                 strv_free(r);
2030                                 return k;
2031                         }
2032                         /* Log invalid environment variables with filename */
2033                         if (p)
2034                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2035
2036                         if (r == NULL)
2037                                 r = p;
2038                         else {
2039                                 char **m;
2040
2041                                 m = strv_env_merge(2, r, p);
2042                                 strv_free(r);
2043                                 strv_free(p);
2044                                 if (!m)
2045                                         return -ENOMEM;
2046
2047                                 r = m;
2048                         }
2049                 }
2050         }
2051
2052         *l = r;
2053
2054         return 0;
2055 }
2056
2057 static bool tty_may_match_dev_console(const char *tty) {
2058         _cleanup_free_ char *active = NULL;
2059        char *console;
2060
2061         if (startswith(tty, "/dev/"))
2062                 tty += 5;
2063
2064         /* trivial identity? */
2065         if (streq(tty, "console"))
2066                 return true;
2067
2068         console = resolve_dev_console(&active);
2069         /* if we could not resolve, assume it may */
2070         if (!console)
2071                 return true;
2072
2073         /* "tty0" means the active VC, so it may be the same sometimes */
2074         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2075 }
2076
2077 bool exec_context_may_touch_console(ExecContext *ec) {
2078         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2079                 is_terminal_input(ec->std_input) ||
2080                 is_terminal_output(ec->std_output) ||
2081                 is_terminal_output(ec->std_error)) &&
2082                tty_may_match_dev_console(tty_path(ec));
2083 }
2084
2085 static void strv_fprintf(FILE *f, char **l) {
2086         char **g;
2087
2088         assert(f);
2089
2090         STRV_FOREACH(g, l)
2091                 fprintf(f, " %s", *g);
2092 }
2093
2094 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2095         char **e;
2096         unsigned i;
2097
2098         assert(c);
2099         assert(f);
2100
2101         prefix = strempty(prefix);
2102
2103         fprintf(f,
2104                 "%sUMask: %04o\n"
2105                 "%sWorkingDirectory: %s\n"
2106                 "%sRootDirectory: %s\n"
2107                 "%sNonBlocking: %s\n"
2108                 "%sPrivateTmp: %s\n"
2109                 "%sPrivateNetwork: %s\n"
2110                 "%sPrivateDevices: %s\n"
2111                 "%sProtectHome: %s\n"
2112                 "%sProtectSystem: %s\n"
2113                 "%sIgnoreSIGPIPE: %s\n",
2114                 prefix, c->umask,
2115                 prefix, c->working_directory ? c->working_directory : "/",
2116                 prefix, c->root_directory ? c->root_directory : "/",
2117                 prefix, yes_no(c->non_blocking),
2118                 prefix, yes_no(c->private_tmp),
2119                 prefix, yes_no(c->private_network),
2120                 prefix, yes_no(c->private_devices),
2121                 prefix, protect_home_to_string(c->protect_home),
2122                 prefix, protect_system_to_string(c->protect_system),
2123                 prefix, yes_no(c->ignore_sigpipe));
2124
2125         STRV_FOREACH(e, c->environment)
2126                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2127
2128         STRV_FOREACH(e, c->environment_files)
2129                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2130
2131         if (c->nice_set)
2132                 fprintf(f,
2133                         "%sNice: %i\n",
2134                         prefix, c->nice);
2135
2136         if (c->oom_score_adjust_set)
2137                 fprintf(f,
2138                         "%sOOMScoreAdjust: %i\n",
2139                         prefix, c->oom_score_adjust);
2140
2141         for (i = 0; i < RLIM_NLIMITS; i++)
2142                 if (c->rlimit[i])
2143                         fprintf(f, "%s%s: "RLIM_FMT"\n",
2144                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2145
2146         if (c->ioprio_set) {
2147                 _cleanup_free_ char *class_str = NULL;
2148
2149                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2150                 fprintf(f,
2151                         "%sIOSchedulingClass: %s\n"
2152                         "%sIOPriority: %i\n",
2153                         prefix, strna(class_str),
2154                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2155         }
2156
2157         if (c->cpu_sched_set) {
2158                 _cleanup_free_ char *policy_str = NULL;
2159
2160                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2161                 fprintf(f,
2162                         "%sCPUSchedulingPolicy: %s\n"
2163                         "%sCPUSchedulingPriority: %i\n"
2164                         "%sCPUSchedulingResetOnFork: %s\n",
2165                         prefix, strna(policy_str),
2166                         prefix, c->cpu_sched_priority,
2167                         prefix, yes_no(c->cpu_sched_reset_on_fork));
2168         }
2169
2170         if (c->cpuset) {
2171                 fprintf(f, "%sCPUAffinity:", prefix);
2172                 for (i = 0; i < c->cpuset_ncpus; i++)
2173                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2174                                 fprintf(f, " %u", i);
2175                 fputs("\n", f);
2176         }
2177
2178         if (c->timer_slack_nsec != NSEC_INFINITY)
2179                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2180
2181         fprintf(f,
2182                 "%sStandardInput: %s\n"
2183                 "%sStandardOutput: %s\n"
2184                 "%sStandardError: %s\n",
2185                 prefix, exec_input_to_string(c->std_input),
2186                 prefix, exec_output_to_string(c->std_output),
2187                 prefix, exec_output_to_string(c->std_error));
2188
2189         if (c->tty_path)
2190                 fprintf(f,
2191                         "%sTTYPath: %s\n"
2192                         "%sTTYReset: %s\n"
2193                         "%sTTYVHangup: %s\n"
2194                         "%sTTYVTDisallocate: %s\n",
2195                         prefix, c->tty_path,
2196                         prefix, yes_no(c->tty_reset),
2197                         prefix, yes_no(c->tty_vhangup),
2198                         prefix, yes_no(c->tty_vt_disallocate));
2199
2200         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2201             c->std_output == EXEC_OUTPUT_KMSG ||
2202             c->std_output == EXEC_OUTPUT_JOURNAL ||
2203             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2204             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2205             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2206             c->std_error == EXEC_OUTPUT_SYSLOG ||
2207             c->std_error == EXEC_OUTPUT_KMSG ||
2208             c->std_error == EXEC_OUTPUT_JOURNAL ||
2209             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2210             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2211             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2212
2213                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2214
2215                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2216                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2217
2218                 fprintf(f,
2219                         "%sSyslogFacility: %s\n"
2220                         "%sSyslogLevel: %s\n",
2221                         prefix, strna(fac_str),
2222                         prefix, strna(lvl_str));
2223         }
2224
2225         if (c->capabilities) {
2226                 _cleanup_cap_free_charp_ char *t;
2227
2228                 t = cap_to_text(c->capabilities, NULL);
2229                 if (t)
2230                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2231         }
2232
2233         if (c->secure_bits)
2234                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2235                         prefix,
2236                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2237                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2238                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2239                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2240                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2241                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2242
2243         if (c->capability_bounding_set_drop) {
2244                 unsigned long l;
2245                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2246
2247                 for (l = 0; l <= cap_last_cap(); l++)
2248                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2249                                 _cleanup_cap_free_charp_ char *t;
2250
2251                                 t = cap_to_name(l);
2252                                 if (t)
2253                                         fprintf(f, " %s", t);
2254                         }
2255
2256                 fputs("\n", f);
2257         }
2258
2259         if (c->user)
2260                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2261         if (c->group)
2262                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2263
2264         if (strv_length(c->supplementary_groups) > 0) {
2265                 fprintf(f, "%sSupplementaryGroups:", prefix);
2266                 strv_fprintf(f, c->supplementary_groups);
2267                 fputs("\n", f);
2268         }
2269
2270         if (c->pam_name)
2271                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2272
2273         if (strv_length(c->read_write_dirs) > 0) {
2274                 fprintf(f, "%sReadWriteDirs:", prefix);
2275                 strv_fprintf(f, c->read_write_dirs);
2276                 fputs("\n", f);
2277         }
2278
2279         if (strv_length(c->read_only_dirs) > 0) {
2280                 fprintf(f, "%sReadOnlyDirs:", prefix);
2281                 strv_fprintf(f, c->read_only_dirs);
2282                 fputs("\n", f);
2283         }
2284
2285         if (strv_length(c->inaccessible_dirs) > 0) {
2286                 fprintf(f, "%sInaccessibleDirs:", prefix);
2287                 strv_fprintf(f, c->inaccessible_dirs);
2288                 fputs("\n", f);
2289         }
2290
2291         if (c->utmp_id)
2292                 fprintf(f,
2293                         "%sUtmpIdentifier: %s\n",
2294                         prefix, c->utmp_id);
2295
2296         if (c->selinux_context)
2297                 fprintf(f,
2298                         "%sSELinuxContext: %s%s\n",
2299                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2300
2301         if (c->personality != 0xffffffffUL)
2302                 fprintf(f,
2303                         "%sPersonality: %s\n",
2304                         prefix, strna(personality_to_string(c->personality)));
2305
2306         if (c->syscall_filter) {
2307 #ifdef HAVE_SECCOMP
2308                 Iterator j;
2309                 void *id;
2310                 bool first = true;
2311 #endif
2312
2313                 fprintf(f,
2314                         "%sSystemCallFilter: ",
2315                         prefix);
2316
2317                 if (!c->syscall_whitelist)
2318                         fputc('~', f);
2319
2320 #ifdef HAVE_SECCOMP
2321                 SET_FOREACH(id, c->syscall_filter, j) {
2322                         _cleanup_free_ char *name = NULL;
2323
2324                         if (first)
2325                                 first = false;
2326                         else
2327                                 fputc(' ', f);
2328
2329                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2330                         fputs(strna(name), f);
2331                 }
2332 #endif
2333
2334                 fputc('\n', f);
2335         }
2336
2337         if (c->syscall_archs) {
2338 #ifdef HAVE_SECCOMP
2339                 Iterator j;
2340                 void *id;
2341 #endif
2342
2343                 fprintf(f,
2344                         "%sSystemCallArchitectures:",
2345                         prefix);
2346
2347 #ifdef HAVE_SECCOMP
2348                 SET_FOREACH(id, c->syscall_archs, j)
2349                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2350 #endif
2351                 fputc('\n', f);
2352         }
2353
2354         if (c->syscall_errno != 0)
2355                 fprintf(f,
2356                         "%sSystemCallErrorNumber: %s\n",
2357                         prefix, strna(errno_to_name(c->syscall_errno)));
2358
2359         if (c->apparmor_profile)
2360                 fprintf(f,
2361                         "%sAppArmorProfile: %s%s\n",
2362                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2363 }
2364
2365 void exec_status_start(ExecStatus *s, pid_t pid) {
2366         assert(s);
2367
2368         zero(*s);
2369         s->pid = pid;
2370         dual_timestamp_get(&s->start_timestamp);
2371 }
2372
2373 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2374         assert(s);
2375
2376         if (s->pid && s->pid != pid)
2377                 zero(*s);
2378
2379         s->pid = pid;
2380         dual_timestamp_get(&s->exit_timestamp);
2381
2382         s->code = code;
2383         s->status = status;
2384
2385         if (context) {
2386                 if (context->utmp_id)
2387                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2388
2389                 exec_context_tty_reset(context);
2390         }
2391 }
2392
2393 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2394         char buf[FORMAT_TIMESTAMP_MAX];
2395
2396         assert(s);
2397         assert(f);
2398
2399         if (s->pid <= 0)
2400                 return;
2401
2402         prefix = strempty(prefix);
2403
2404         fprintf(f,
2405                 "%sPID: "PID_FMT"\n",
2406                 prefix, s->pid);
2407
2408         if (s->start_timestamp.realtime > 0)
2409                 fprintf(f,
2410                         "%sStart Timestamp: %s\n",
2411                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2412
2413         if (s->exit_timestamp.realtime > 0)
2414                 fprintf(f,
2415                         "%sExit Timestamp: %s\n"
2416                         "%sExit Code: %s\n"
2417                         "%sExit Status: %i\n",
2418                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2419                         prefix, sigchld_code_to_string(s->code),
2420                         prefix, s->status);
2421 }
2422
2423 char *exec_command_line(char **argv) {
2424         size_t k;
2425         char *n, *p, **a;
2426         bool first = true;
2427
2428         assert(argv);
2429
2430         k = 1;
2431         STRV_FOREACH(a, argv)
2432                 k += strlen(*a)+3;
2433
2434         if (!(n = new(char, k)))
2435                 return NULL;
2436
2437         p = n;
2438         STRV_FOREACH(a, argv) {
2439
2440                 if (!first)
2441                         *(p++) = ' ';
2442                 else
2443                         first = false;
2444
2445                 if (strpbrk(*a, WHITESPACE)) {
2446                         *(p++) = '\'';
2447                         p = stpcpy(p, *a);
2448                         *(p++) = '\'';
2449                 } else
2450                         p = stpcpy(p, *a);
2451
2452         }
2453
2454         *p = 0;
2455
2456         /* FIXME: this doesn't really handle arguments that have
2457          * spaces and ticks in them */
2458
2459         return n;
2460 }
2461
2462 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2463         _cleanup_free_ char *cmd = NULL;
2464         const char *prefix2;
2465
2466         assert(c);
2467         assert(f);
2468
2469         prefix = strempty(prefix);
2470         prefix2 = strappenda(prefix, "\t");
2471
2472         cmd = exec_command_line(c->argv);
2473         fprintf(f,
2474                 "%sCommand Line: %s\n",
2475                 prefix, cmd ? cmd : strerror(ENOMEM));
2476
2477         exec_status_dump(&c->exec_status, f, prefix2);
2478 }
2479
2480 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2481         assert(f);
2482
2483         prefix = strempty(prefix);
2484
2485         LIST_FOREACH(command, c, c)
2486                 exec_command_dump(c, f, prefix);
2487 }
2488
2489 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2490         ExecCommand *end;
2491
2492         assert(l);
2493         assert(e);
2494
2495         if (*l) {
2496                 /* It's kind of important, that we keep the order here */
2497                 LIST_FIND_TAIL(command, *l, end);
2498                 LIST_INSERT_AFTER(command, *l, end, e);
2499         } else
2500               *l = e;
2501 }
2502
2503 int exec_command_set(ExecCommand *c, const char *path, ...) {
2504         va_list ap;
2505         char **l, *p;
2506
2507         assert(c);
2508         assert(path);
2509
2510         va_start(ap, path);
2511         l = strv_new_ap(path, ap);
2512         va_end(ap);
2513
2514         if (!l)
2515                 return -ENOMEM;
2516
2517         p = strdup(path);
2518         if (!p) {
2519                 strv_free(l);
2520                 return -ENOMEM;
2521         }
2522
2523         free(c->path);
2524         c->path = p;
2525
2526         strv_free(c->argv);
2527         c->argv = l;
2528
2529         return 0;
2530 }
2531
2532 static int exec_runtime_allocate(ExecRuntime **rt) {
2533
2534         if (*rt)
2535                 return 0;
2536
2537         *rt = new0(ExecRuntime, 1);
2538         if (!*rt)
2539                 return -ENOMEM;
2540
2541         (*rt)->n_ref = 1;
2542         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2543
2544         return 0;
2545 }
2546
2547 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2548         int r;
2549
2550         assert(rt);
2551         assert(c);
2552         assert(id);
2553
2554         if (*rt)
2555                 return 1;
2556
2557         if (!c->private_network && !c->private_tmp)
2558                 return 0;
2559
2560         r = exec_runtime_allocate(rt);
2561         if (r < 0)
2562                 return r;
2563
2564         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2565                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2566                         return -errno;
2567         }
2568
2569         if (c->private_tmp && !(*rt)->tmp_dir) {
2570                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2571                 if (r < 0)
2572                         return r;
2573         }
2574
2575         return 1;
2576 }
2577
2578 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2579         assert(r);
2580         assert(r->n_ref > 0);
2581
2582         r->n_ref++;
2583         return r;
2584 }
2585
2586 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2587
2588         if (!r)
2589                 return NULL;
2590
2591         assert(r->n_ref > 0);
2592
2593         r->n_ref--;
2594         if (r->n_ref <= 0) {
2595                 free(r->tmp_dir);
2596                 free(r->var_tmp_dir);
2597                 safe_close_pair(r->netns_storage_socket);
2598                 free(r);
2599         }
2600
2601         return NULL;
2602 }
2603
2604 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2605         assert(u);
2606         assert(f);
2607         assert(fds);
2608
2609         if (!rt)
2610                 return 0;
2611
2612         if (rt->tmp_dir)
2613                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2614
2615         if (rt->var_tmp_dir)
2616                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2617
2618         if (rt->netns_storage_socket[0] >= 0) {
2619                 int copy;
2620
2621                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2622                 if (copy < 0)
2623                         return copy;
2624
2625                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2626         }
2627
2628         if (rt->netns_storage_socket[1] >= 0) {
2629                 int copy;
2630
2631                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2632                 if (copy < 0)
2633                         return copy;
2634
2635                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2636         }
2637
2638         return 0;
2639 }
2640
2641 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2642         int r;
2643
2644         assert(rt);
2645         assert(key);
2646         assert(value);
2647
2648         if (streq(key, "tmp-dir")) {
2649                 char *copy;
2650
2651                 r = exec_runtime_allocate(rt);
2652                 if (r < 0)
2653                         return r;
2654
2655                 copy = strdup(value);
2656                 if (!copy)
2657                         return log_oom();
2658
2659                 free((*rt)->tmp_dir);
2660                 (*rt)->tmp_dir = copy;
2661
2662         } else if (streq(key, "var-tmp-dir")) {
2663                 char *copy;
2664
2665                 r = exec_runtime_allocate(rt);
2666                 if (r < 0)
2667                         return r;
2668
2669                 copy = strdup(value);
2670                 if (!copy)
2671                         return log_oom();
2672
2673                 free((*rt)->var_tmp_dir);
2674                 (*rt)->var_tmp_dir = copy;
2675
2676         } else if (streq(key, "netns-socket-0")) {
2677                 int fd;
2678
2679                 r = exec_runtime_allocate(rt);
2680                 if (r < 0)
2681                         return r;
2682
2683                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2684                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2685                 else {
2686                         safe_close((*rt)->netns_storage_socket[0]);
2687                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2688                 }
2689         } else if (streq(key, "netns-socket-1")) {
2690                 int fd;
2691
2692                 r = exec_runtime_allocate(rt);
2693                 if (r < 0)
2694                         return r;
2695
2696                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2697                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2698                 else {
2699                         safe_close((*rt)->netns_storage_socket[1]);
2700                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2701                 }
2702         } else
2703                 return 0;
2704
2705         return 1;
2706 }
2707
2708 static void *remove_tmpdir_thread(void *p) {
2709         _cleanup_free_ char *path = p;
2710
2711         rm_rf_dangerous(path, false, true, false);
2712         return NULL;
2713 }
2714
2715 void exec_runtime_destroy(ExecRuntime *rt) {
2716         int r;
2717
2718         if (!rt)
2719                 return;
2720
2721         /* If there are multiple users of this, let's leave the stuff around */
2722         if (rt->n_ref > 1)
2723                 return;
2724
2725         if (rt->tmp_dir) {
2726                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2727
2728                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2729                 if (r < 0) {
2730                         log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2731                         free(rt->tmp_dir);
2732                 }
2733
2734                 rt->tmp_dir = NULL;
2735         }
2736
2737         if (rt->var_tmp_dir) {
2738                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2739
2740                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2741                 if (r < 0) {
2742                         log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2743                         free(rt->var_tmp_dir);
2744                 }
2745
2746                 rt->var_tmp_dir = NULL;
2747         }
2748
2749         safe_close_pair(rt->netns_storage_socket);
2750 }
2751
2752 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2753         [EXEC_INPUT_NULL] = "null",
2754         [EXEC_INPUT_TTY] = "tty",
2755         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2756         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2757         [EXEC_INPUT_SOCKET] = "socket"
2758 };
2759
2760 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2761
2762 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2763         [EXEC_OUTPUT_INHERIT] = "inherit",
2764         [EXEC_OUTPUT_NULL] = "null",
2765         [EXEC_OUTPUT_TTY] = "tty",
2766         [EXEC_OUTPUT_SYSLOG] = "syslog",
2767         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2768         [EXEC_OUTPUT_KMSG] = "kmsg",
2769         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2770         [EXEC_OUTPUT_JOURNAL] = "journal",
2771         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2772         [EXEC_OUTPUT_SOCKET] = "socket"
2773 };
2774
2775 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);