chiark / gitweb /
core: fix build without libseccomp
[elogind.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37 #include <sys/mount.h>
38 #include <linux/fs.h>
39 #include <linux/oom.h>
40 #include <sys/poll.h>
41 #include <glob.h>
42 #include <libgen.h>
43 #undef basename
44
45 #ifdef HAVE_PAM
46 #include <security/pam_appl.h>
47 #endif
48
49 #ifdef HAVE_SELINUX
50 #include <selinux/selinux.h>
51 #endif
52
53 #ifdef HAVE_SECCOMP
54 #include <seccomp.h>
55 #endif
56
57 #include "execute.h"
58 #include "strv.h"
59 #include "macro.h"
60 #include "capability.h"
61 #include "util.h"
62 #include "log.h"
63 #include "sd-messages.h"
64 #include "ioprio.h"
65 #include "securebits.h"
66 #include "namespace.h"
67 #include "tcpwrap.h"
68 #include "exit-status.h"
69 #include "missing.h"
70 #include "utmp-wtmp.h"
71 #include "def.h"
72 #include "path-util.h"
73 #include "env-util.h"
74 #include "fileio.h"
75 #include "unit.h"
76 #include "async.h"
77 #include "selinux-util.h"
78 #include "errno-list.h"
79
80 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
81 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
82
83 /* This assumes there is a 'tty' group */
84 #define TTY_MODE 0620
85
86 #define SNDBUF_SIZE (8*1024*1024)
87
88 static int shift_fds(int fds[], unsigned n_fds) {
89         int start, restart_from;
90
91         if (n_fds <= 0)
92                 return 0;
93
94         /* Modifies the fds array! (sorts it) */
95
96         assert(fds);
97
98         start = 0;
99         for (;;) {
100                 int i;
101
102                 restart_from = -1;
103
104                 for (i = start; i < (int) n_fds; i++) {
105                         int nfd;
106
107                         /* Already at right index? */
108                         if (fds[i] == i+3)
109                                 continue;
110
111                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
112                                 return -errno;
113
114                         close_nointr_nofail(fds[i]);
115                         fds[i] = nfd;
116
117                         /* Hmm, the fd we wanted isn't free? Then
118                          * let's remember that and try again from here*/
119                         if (nfd != i+3 && restart_from < 0)
120                                 restart_from = i;
121                 }
122
123                 if (restart_from < 0)
124                         break;
125
126                 start = restart_from;
127         }
128
129         return 0;
130 }
131
132 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
133         unsigned i;
134         int r;
135
136         if (n_fds <= 0)
137                 return 0;
138
139         assert(fds);
140
141         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
142
143         for (i = 0; i < n_fds; i++) {
144
145                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
146                         return r;
147
148                 /* We unconditionally drop FD_CLOEXEC from the fds,
149                  * since after all we want to pass these fds to our
150                  * children */
151
152                 if ((r = fd_cloexec(fds[i], false)) < 0)
153                         return r;
154         }
155
156         return 0;
157 }
158
159 _pure_ static const char *tty_path(const ExecContext *context) {
160         assert(context);
161
162         if (context->tty_path)
163                 return context->tty_path;
164
165         return "/dev/console";
166 }
167
168 static void exec_context_tty_reset(const ExecContext *context) {
169         assert(context);
170
171         if (context->tty_vhangup)
172                 terminal_vhangup(tty_path(context));
173
174         if (context->tty_reset)
175                 reset_terminal(tty_path(context));
176
177         if (context->tty_vt_disallocate && context->tty_path)
178                 vt_disallocate(context->tty_path);
179 }
180
181 static bool is_terminal_output(ExecOutput o) {
182         return
183                 o == EXEC_OUTPUT_TTY ||
184                 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
185                 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
186                 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
187 }
188
189 static int open_null_as(int flags, int nfd) {
190         int fd, r;
191
192         assert(nfd >= 0);
193
194         fd = open("/dev/null", flags|O_NOCTTY);
195         if (fd < 0)
196                 return -errno;
197
198         if (fd != nfd) {
199                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
200                 close_nointr_nofail(fd);
201         } else
202                 r = nfd;
203
204         return r;
205 }
206
207 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
208         int fd, r;
209         union sockaddr_union sa = {
210                 .un.sun_family = AF_UNIX,
211                 .un.sun_path = "/run/systemd/journal/stdout",
212         };
213
214         assert(context);
215         assert(output < _EXEC_OUTPUT_MAX);
216         assert(ident);
217         assert(nfd >= 0);
218
219         fd = socket(AF_UNIX, SOCK_STREAM, 0);
220         if (fd < 0)
221                 return -errno;
222
223         r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
224         if (r < 0) {
225                 close_nointr_nofail(fd);
226                 return -errno;
227         }
228
229         if (shutdown(fd, SHUT_RD) < 0) {
230                 close_nointr_nofail(fd);
231                 return -errno;
232         }
233
234         fd_inc_sndbuf(fd, SNDBUF_SIZE);
235
236         dprintf(fd,
237                 "%s\n"
238                 "%s\n"
239                 "%i\n"
240                 "%i\n"
241                 "%i\n"
242                 "%i\n"
243                 "%i\n",
244                 context->syslog_identifier ? context->syslog_identifier : ident,
245                 unit_id,
246                 context->syslog_priority,
247                 !!context->syslog_level_prefix,
248                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
249                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
250                 is_terminal_output(output));
251
252         if (fd != nfd) {
253                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
254                 close_nointr_nofail(fd);
255         } else
256                 r = nfd;
257
258         return r;
259 }
260 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
261         int fd, r;
262
263         assert(path);
264         assert(nfd >= 0);
265
266         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
267                 return fd;
268
269         if (fd != nfd) {
270                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
271                 close_nointr_nofail(fd);
272         } else
273                 r = nfd;
274
275         return r;
276 }
277
278 static bool is_terminal_input(ExecInput i) {
279         return
280                 i == EXEC_INPUT_TTY ||
281                 i == EXEC_INPUT_TTY_FORCE ||
282                 i == EXEC_INPUT_TTY_FAIL;
283 }
284
285 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
286
287         if (is_terminal_input(std_input) && !apply_tty_stdin)
288                 return EXEC_INPUT_NULL;
289
290         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
291                 return EXEC_INPUT_NULL;
292
293         return std_input;
294 }
295
296 static int fixup_output(ExecOutput std_output, int socket_fd) {
297
298         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
299                 return EXEC_OUTPUT_INHERIT;
300
301         return std_output;
302 }
303
304 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
305         ExecInput i;
306
307         assert(context);
308
309         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
310
311         switch (i) {
312
313         case EXEC_INPUT_NULL:
314                 return open_null_as(O_RDONLY, STDIN_FILENO);
315
316         case EXEC_INPUT_TTY:
317         case EXEC_INPUT_TTY_FORCE:
318         case EXEC_INPUT_TTY_FAIL: {
319                 int fd, r;
320
321                 fd = acquire_terminal(tty_path(context),
322                                       i == EXEC_INPUT_TTY_FAIL,
323                                       i == EXEC_INPUT_TTY_FORCE,
324                                       false,
325                                       (usec_t) -1);
326                 if (fd < 0)
327                         return fd;
328
329                 if (fd != STDIN_FILENO) {
330                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
331                         close_nointr_nofail(fd);
332                 } else
333                         r = STDIN_FILENO;
334
335                 return r;
336         }
337
338         case EXEC_INPUT_SOCKET:
339                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
340
341         default:
342                 assert_not_reached("Unknown input type");
343         }
344 }
345
346 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
347         ExecOutput o;
348         ExecInput i;
349         int r;
350
351         assert(context);
352         assert(ident);
353
354         i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
355         o = fixup_output(context->std_output, socket_fd);
356
357         if (fileno == STDERR_FILENO) {
358                 ExecOutput e;
359                 e = fixup_output(context->std_error, socket_fd);
360
361                 /* This expects the input and output are already set up */
362
363                 /* Don't change the stderr file descriptor if we inherit all
364                  * the way and are not on a tty */
365                 if (e == EXEC_OUTPUT_INHERIT &&
366                     o == EXEC_OUTPUT_INHERIT &&
367                     i == EXEC_INPUT_NULL &&
368                     !is_terminal_input(context->std_input) &&
369                     getppid () != 1)
370                         return fileno;
371
372                 /* Duplicate from stdout if possible */
373                 if (e == o || e == EXEC_OUTPUT_INHERIT)
374                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
375
376                 o = e;
377
378         } else if (o == EXEC_OUTPUT_INHERIT) {
379                 /* If input got downgraded, inherit the original value */
380                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
381                         return open_terminal_as(tty_path(context), O_WRONLY, fileno);
382
383                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
384                 if (i != EXEC_INPUT_NULL)
385                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
386
387                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
388                 if (getppid() != 1)
389                         return fileno;
390
391                 /* We need to open /dev/null here anew, to get the right access mode. */
392                 return open_null_as(O_WRONLY, fileno);
393         }
394
395         switch (o) {
396
397         case EXEC_OUTPUT_NULL:
398                 return open_null_as(O_WRONLY, fileno);
399
400         case EXEC_OUTPUT_TTY:
401                 if (is_terminal_input(i))
402                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
403
404                 /* We don't reset the terminal if this is just about output */
405                 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
406
407         case EXEC_OUTPUT_SYSLOG:
408         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
409         case EXEC_OUTPUT_KMSG:
410         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
411         case EXEC_OUTPUT_JOURNAL:
412         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
413                 r = connect_logger_as(context, o, ident, unit_id, fileno);
414                 if (r < 0) {
415                         log_struct_unit(LOG_CRIT, unit_id,
416                                 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
417                                 fileno == STDOUT_FILENO ? "out" : "err",
418                                 unit_id, strerror(-r),
419                                 "ERRNO=%d", -r,
420                                 NULL);
421                         r = open_null_as(O_WRONLY, fileno);
422                 }
423                 return r;
424
425         case EXEC_OUTPUT_SOCKET:
426                 assert(socket_fd >= 0);
427                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
428
429         default:
430                 assert_not_reached("Unknown error type");
431         }
432 }
433
434 static int chown_terminal(int fd, uid_t uid) {
435         struct stat st;
436
437         assert(fd >= 0);
438
439         /* This might fail. What matters are the results. */
440         (void) fchown(fd, uid, -1);
441         (void) fchmod(fd, TTY_MODE);
442
443         if (fstat(fd, &st) < 0)
444                 return -errno;
445
446         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
447                 return -EPERM;
448
449         return 0;
450 }
451
452 static int setup_confirm_stdio(int *_saved_stdin,
453                                int *_saved_stdout) {
454         int fd = -1, saved_stdin, saved_stdout = -1, r;
455
456         assert(_saved_stdin);
457         assert(_saved_stdout);
458
459         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
460         if (saved_stdin < 0)
461                 return -errno;
462
463         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
464         if (saved_stdout < 0) {
465                 r = errno;
466                 goto fail;
467         }
468
469         fd = acquire_terminal(
470                         "/dev/console",
471                         false,
472                         false,
473                         false,
474                         DEFAULT_CONFIRM_USEC);
475         if (fd < 0) {
476                 r = fd;
477                 goto fail;
478         }
479
480         r = chown_terminal(fd, getuid());
481         if (r < 0)
482                 goto fail;
483
484         if (dup2(fd, STDIN_FILENO) < 0) {
485                 r = -errno;
486                 goto fail;
487         }
488
489         if (dup2(fd, STDOUT_FILENO) < 0) {
490                 r = -errno;
491                 goto fail;
492         }
493
494         if (fd >= 2)
495                 close_nointr_nofail(fd);
496
497         *_saved_stdin = saved_stdin;
498         *_saved_stdout = saved_stdout;
499
500         return 0;
501
502 fail:
503         if (saved_stdout >= 0)
504                 close_nointr_nofail(saved_stdout);
505
506         if (saved_stdin >= 0)
507                 close_nointr_nofail(saved_stdin);
508
509         if (fd >= 0)
510                 close_nointr_nofail(fd);
511
512         return r;
513 }
514
515 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
516         int fd;
517         va_list ap;
518
519         assert(format);
520
521         fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
522         if (fd < 0)
523                 return fd;
524
525         va_start(ap, format);
526         vdprintf(fd, format, ap);
527         va_end(ap);
528
529         close_nointr_nofail(fd);
530
531         return 0;
532 }
533
534 static int restore_confirm_stdio(int *saved_stdin,
535                                  int *saved_stdout) {
536
537         int r = 0;
538
539         assert(saved_stdin);
540         assert(saved_stdout);
541
542         release_terminal();
543
544         if (*saved_stdin >= 0)
545                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
546                         r = -errno;
547
548         if (*saved_stdout >= 0)
549                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550                         r = -errno;
551
552         if (*saved_stdin >= 0)
553                 close_nointr_nofail(*saved_stdin);
554
555         if (*saved_stdout >= 0)
556                 close_nointr_nofail(*saved_stdout);
557
558         return r;
559 }
560
561 static int ask_for_confirmation(char *response, char **argv) {
562         int saved_stdout = -1, saved_stdin = -1, r;
563         char *line;
564
565         r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
566         if (r < 0)
567                 return r;
568
569         line = exec_command_line(argv);
570         if (!line)
571                 return -ENOMEM;
572
573         r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
574         free(line);
575
576         restore_confirm_stdio(&saved_stdin, &saved_stdout);
577
578         return r;
579 }
580
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582         bool keep_groups = false;
583         int r;
584
585         assert(context);
586
587         /* Lookup and set GID and supplementary group list. Here too
588          * we avoid NSS lookups for gid=0. */
589
590         if (context->group || username) {
591
592                 if (context->group) {
593                         const char *g = context->group;
594
595                         if ((r = get_group_creds(&g, &gid)) < 0)
596                                 return r;
597                 }
598
599                 /* First step, initialize groups from /etc/groups */
600                 if (username && gid != 0) {
601                         if (initgroups(username, gid) < 0)
602                                 return -errno;
603
604                         keep_groups = true;
605                 }
606
607                 /* Second step, set our gids */
608                 if (setresgid(gid, gid, gid) < 0)
609                         return -errno;
610         }
611
612         if (context->supplementary_groups) {
613                 int ngroups_max, k;
614                 gid_t *gids;
615                 char **i;
616
617                 /* Final step, initialize any manually set supplementary groups */
618                 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619
620                 if (!(gids = new(gid_t, ngroups_max)))
621                         return -ENOMEM;
622
623                 if (keep_groups) {
624                         if ((k = getgroups(ngroups_max, gids)) < 0) {
625                                 free(gids);
626                                 return -errno;
627                         }
628                 } else
629                         k = 0;
630
631                 STRV_FOREACH(i, context->supplementary_groups) {
632                         const char *g;
633
634                         if (k >= ngroups_max) {
635                                 free(gids);
636                                 return -E2BIG;
637                         }
638
639                         g = *i;
640                         r = get_group_creds(&g, gids+k);
641                         if (r < 0) {
642                                 free(gids);
643                                 return r;
644                         }
645
646                         k++;
647                 }
648
649                 if (setgroups(k, gids) < 0) {
650                         free(gids);
651                         return -errno;
652                 }
653
654                 free(gids);
655         }
656
657         return 0;
658 }
659
660 static int enforce_user(const ExecContext *context, uid_t uid) {
661         assert(context);
662
663         /* Sets (but doesn't lookup) the uid and make sure we keep the
664          * capabilities while doing so. */
665
666         if (context->capabilities) {
667                 _cleanup_cap_free_ cap_t d = NULL;
668                 static const cap_value_t bits[] = {
669                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
670                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
671                 };
672
673                 /* First step: If we need to keep capabilities but
674                  * drop privileges we need to make sure we keep our
675                  * caps, while we drop privileges. */
676                 if (uid != 0) {
677                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
678
679                         if (prctl(PR_GET_SECUREBITS) != sb)
680                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681                                         return -errno;
682                 }
683
684                 /* Second step: set the capabilities. This will reduce
685                  * the capabilities to the minimum we need. */
686
687                 d = cap_dup(context->capabilities);
688                 if (!d)
689                         return -errno;
690
691                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
693                         return -errno;
694
695                 if (cap_set_proc(d) < 0)
696                         return -errno;
697         }
698
699         /* Third step: actually set the uids */
700         if (setresuid(uid, uid, uid) < 0)
701                 return -errno;
702
703         /* At this point we should have all necessary capabilities but
704            are otherwise a normal user. However, the caps might got
705            corrupted due to the setresuid() so we need clean them up
706            later. This is done outside of this call. */
707
708         return 0;
709 }
710
711 #ifdef HAVE_PAM
712
713 static int null_conv(
714                 int num_msg,
715                 const struct pam_message **msg,
716                 struct pam_response **resp,
717                 void *appdata_ptr) {
718
719         /* We don't support conversations */
720
721         return PAM_CONV_ERR;
722 }
723
724 static int setup_pam(
725                 const char *name,
726                 const char *user,
727                 uid_t uid,
728                 const char *tty,
729                 char ***pam_env,
730                 int fds[], unsigned n_fds) {
731
732         static const struct pam_conv conv = {
733                 .conv = null_conv,
734                 .appdata_ptr = NULL
735         };
736
737         pam_handle_t *handle = NULL;
738         sigset_t ss, old_ss;
739         int pam_code = PAM_SUCCESS;
740         int err;
741         char **e = NULL;
742         bool close_session = false;
743         pid_t pam_pid = 0, parent_pid;
744         int flags = 0;
745
746         assert(name);
747         assert(user);
748         assert(pam_env);
749
750         /* We set up PAM in the parent process, then fork. The child
751          * will then stay around until killed via PR_GET_PDEATHSIG or
752          * systemd via the cgroup logic. It will then remove the PAM
753          * session again. The parent process will exec() the actual
754          * daemon. We do things this way to ensure that the main PID
755          * of the daemon is the one we initially fork()ed. */
756
757         if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
758                 flags |= PAM_SILENT;
759
760         pam_code = pam_start(name, user, &conv, &handle);
761         if (pam_code != PAM_SUCCESS) {
762                 handle = NULL;
763                 goto fail;
764         }
765
766         if (tty) {
767                 pam_code = pam_set_item(handle, PAM_TTY, tty);
768                 if (pam_code != PAM_SUCCESS)
769                         goto fail;
770         }
771
772         pam_code = pam_acct_mgmt(handle, flags);
773         if (pam_code != PAM_SUCCESS)
774                 goto fail;
775
776         pam_code = pam_open_session(handle, flags);
777         if (pam_code != PAM_SUCCESS)
778                 goto fail;
779
780         close_session = true;
781
782         e = pam_getenvlist(handle);
783         if (!e) {
784                 pam_code = PAM_BUF_ERR;
785                 goto fail;
786         }
787
788         /* Block SIGTERM, so that we know that it won't get lost in
789          * the child */
790         if (sigemptyset(&ss) < 0 ||
791             sigaddset(&ss, SIGTERM) < 0 ||
792             sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793                 goto fail;
794
795         parent_pid = getpid();
796
797         pam_pid = fork();
798         if (pam_pid < 0)
799                 goto fail;
800
801         if (pam_pid == 0) {
802                 int sig;
803                 int r = EXIT_PAM;
804
805                 /* The child's job is to reset the PAM session on
806                  * termination */
807
808                 /* This string must fit in 10 chars (i.e. the length
809                  * of "/sbin/init"), to look pretty in /bin/ps */
810                 rename_process("(sd-pam)");
811
812                 /* Make sure we don't keep open the passed fds in this
813                 child. We assume that otherwise only those fds are
814                 open here that have been opened by PAM. */
815                 close_many(fds, n_fds);
816
817                 /* Drop privileges - we don't need any to pam_close_session
818                  * and this will make PR_SET_PDEATHSIG work in most cases.
819                  * If this fails, ignore the error - but expect sd-pam threads
820                  * to fail to exit normally */
821                 if (setresuid(uid, uid, uid) < 0)
822                         log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823
824                 /* Wait until our parent died. This will only work if
825                  * the above setresuid() succeeds, otherwise the kernel
826                  * will not allow unprivileged parents kill their privileged
827                  * children this way. We rely on the control groups kill logic
828                  * to do the rest for us. */
829                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830                         goto child_finish;
831
832                 /* Check if our parent process might already have
833                  * died? */
834                 if (getppid() == parent_pid) {
835                         for (;;) {
836                                 if (sigwait(&ss, &sig) < 0) {
837                                         if (errno == EINTR)
838                                                 continue;
839
840                                         goto child_finish;
841                                 }
842
843                                 assert(sig == SIGTERM);
844                                 break;
845                         }
846                 }
847
848                 /* If our parent died we'll end the session */
849                 if (getppid() != parent_pid) {
850                         pam_code = pam_close_session(handle, flags);
851                         if (pam_code != PAM_SUCCESS)
852                                 goto child_finish;
853                 }
854
855                 r = 0;
856
857         child_finish:
858                 pam_end(handle, pam_code | flags);
859                 _exit(r);
860         }
861
862         /* If the child was forked off successfully it will do all the
863          * cleanups, so forget about the handle here. */
864         handle = NULL;
865
866         /* Unblock SIGTERM again in the parent */
867         if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
868                 goto fail;
869
870         /* We close the log explicitly here, since the PAM modules
871          * might have opened it, but we don't want this fd around. */
872         closelog();
873
874         *pam_env = e;
875         e = NULL;
876
877         return 0;
878
879 fail:
880         if (pam_code != PAM_SUCCESS) {
881                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882                 err = -EPERM;  /* PAM errors do not map to errno */
883         } else {
884                 log_error("PAM failed: %m");
885                 err = -errno;
886         }
887
888         if (handle) {
889                 if (close_session)
890                         pam_code = pam_close_session(handle, flags);
891
892                 pam_end(handle, pam_code | flags);
893         }
894
895         strv_free(e);
896
897         closelog();
898
899         if (pam_pid > 1) {
900                 kill(pam_pid, SIGTERM);
901                 kill(pam_pid, SIGCONT);
902         }
903
904         return err;
905 }
906 #endif
907
908 static void rename_process_from_path(const char *path) {
909         char process_name[11];
910         const char *p;
911         size_t l;
912
913         /* This resulting string must fit in 10 chars (i.e. the length
914          * of "/sbin/init") to look pretty in /bin/ps */
915
916         p = basename(path);
917         if (isempty(p)) {
918                 rename_process("(...)");
919                 return;
920         }
921
922         l = strlen(p);
923         if (l > 8) {
924                 /* The end of the process name is usually more
925                  * interesting, since the first bit might just be
926                  * "systemd-" */
927                 p = p + l - 8;
928                 l = 8;
929         }
930
931         process_name[0] = '(';
932         memcpy(process_name+1, p, l);
933         process_name[1+l] = ')';
934         process_name[1+l+1] = 0;
935
936         rename_process(process_name);
937 }
938
939 #ifdef HAVE_SECCOMP
940
941 static int apply_seccomp(ExecContext *c) {
942         uint32_t negative_action, action;
943         scmp_filter_ctx *seccomp;
944         Iterator i;
945         void *id;
946         int r;
947
948         assert(c);
949
950         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
951
952         seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
953         if (!seccomp)
954                 return -ENOMEM;
955
956         action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
957
958         SET_FOREACH(id, c->syscall_filter, i) {
959                 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
960                 if (r < 0) {
961                         seccomp_release(seccomp);
962                         return r;
963                 }
964         }
965
966         r = seccomp_load(seccomp);
967         seccomp_release(seccomp);
968
969         return r;
970 }
971 #endif
972
973 static void do_idle_pipe_dance(int idle_pipe[4]) {
974         assert(idle_pipe);
975
976         if (idle_pipe[1] >= 0)
977                 close_nointr_nofail(idle_pipe[1]);
978         if (idle_pipe[2] >= 0)
979                 close_nointr_nofail(idle_pipe[2]);
980
981         if (idle_pipe[0] >= 0) {
982                 int r;
983
984                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
985
986                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
987                         /* Signal systemd that we are bored and want to continue. */
988                         write(idle_pipe[3], "x", 1);
989
990                         /* Wait for systemd to react to the signal above. */
991                         fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
992                 }
993
994                 close_nointr_nofail(idle_pipe[0]);
995
996         }
997
998         if (idle_pipe[3] >= 0)
999                 close_nointr_nofail(idle_pipe[3]);
1000 }
1001
1002 static int build_environment(
1003                 ExecContext *c,
1004                 unsigned n_fds,
1005                 usec_t watchdog_usec,
1006                 const char *home,
1007                 const char *username,
1008                 const char *shell,
1009                 char ***ret) {
1010
1011         _cleanup_strv_free_ char **our_env = NULL;
1012         unsigned n_env = 0;
1013         char *x;
1014
1015         assert(c);
1016         assert(ret);
1017
1018         our_env = new0(char*, 10);
1019         if (!our_env)
1020                 return -ENOMEM;
1021
1022         if (n_fds > 0) {
1023                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1024                         return -ENOMEM;
1025                 our_env[n_env++] = x;
1026
1027                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1028                         return -ENOMEM;
1029                 our_env[n_env++] = x;
1030         }
1031
1032         if (watchdog_usec > 0) {
1033                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1034                         return -ENOMEM;
1035                 our_env[n_env++] = x;
1036
1037                 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1038                         return -ENOMEM;
1039                 our_env[n_env++] = x;
1040         }
1041
1042         if (home) {
1043                 x = strappend("HOME=", home);
1044                 if (!x)
1045                         return -ENOMEM;
1046                 our_env[n_env++] = x;
1047         }
1048
1049         if (username) {
1050                 x = strappend("LOGNAME=", username);
1051                 if (!x)
1052                         return -ENOMEM;
1053                 our_env[n_env++] = x;
1054
1055                 x = strappend("USER=", username);
1056                 if (!x)
1057                         return -ENOMEM;
1058                 our_env[n_env++] = x;
1059         }
1060
1061         if (shell) {
1062                 x = strappend("SHELL=", shell);
1063                 if (!x)
1064                         return -ENOMEM;
1065                 our_env[n_env++] = x;
1066         }
1067
1068         if (is_terminal_input(c->std_input) ||
1069             c->std_output == EXEC_OUTPUT_TTY ||
1070             c->std_error == EXEC_OUTPUT_TTY ||
1071             c->tty_path) {
1072
1073                 x = strdup(default_term_for_tty(tty_path(c)));
1074                 if (!x)
1075                         return -ENOMEM;
1076                 our_env[n_env++] = x;
1077         }
1078
1079         our_env[n_env++] = NULL;
1080         assert(n_env <= 10);
1081
1082         *ret = our_env;
1083         our_env = NULL;
1084
1085         return 0;
1086 }
1087
1088 int exec_spawn(ExecCommand *command,
1089                char **argv,
1090                ExecContext *context,
1091                int fds[], unsigned n_fds,
1092                char **environment,
1093                bool apply_permissions,
1094                bool apply_chroot,
1095                bool apply_tty_stdin,
1096                bool confirm_spawn,
1097                CGroupControllerMask cgroup_supported,
1098                const char *cgroup_path,
1099                const char *unit_id,
1100                usec_t watchdog_usec,
1101                int idle_pipe[4],
1102                ExecRuntime *runtime,
1103                pid_t *ret) {
1104
1105         _cleanup_strv_free_ char **files_env = NULL;
1106         int socket_fd;
1107         char *line;
1108         pid_t pid;
1109         int r;
1110
1111         assert(command);
1112         assert(context);
1113         assert(ret);
1114         assert(fds || n_fds <= 0);
1115
1116         if (context->std_input == EXEC_INPUT_SOCKET ||
1117             context->std_output == EXEC_OUTPUT_SOCKET ||
1118             context->std_error == EXEC_OUTPUT_SOCKET) {
1119
1120                 if (n_fds != 1)
1121                         return -EINVAL;
1122
1123                 socket_fd = fds[0];
1124
1125                 fds = NULL;
1126                 n_fds = 0;
1127         } else
1128                 socket_fd = -1;
1129
1130         r = exec_context_load_environment(context, &files_env);
1131         if (r < 0) {
1132                 log_struct_unit(LOG_ERR,
1133                            unit_id,
1134                            "MESSAGE=Failed to load environment files: %s", strerror(-r),
1135                            "ERRNO=%d", -r,
1136                            NULL);
1137                 return r;
1138         }
1139
1140         if (!argv)
1141                 argv = command->argv;
1142
1143         line = exec_command_line(argv);
1144         if (!line)
1145                 return log_oom();
1146
1147         log_struct_unit(LOG_DEBUG,
1148                         unit_id,
1149                         "EXECUTABLE=%s", command->path,
1150                         "MESSAGE=About to execute: %s", line,
1151                         NULL);
1152         free(line);
1153
1154         pid = fork();
1155         if (pid < 0)
1156                 return -errno;
1157
1158         if (pid == 0) {
1159                 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1160                 const char *username = NULL, *home = NULL, *shell = NULL;
1161                 unsigned n_dont_close = 0;
1162                 int dont_close[n_fds + 3];
1163                 uid_t uid = (uid_t) -1;
1164                 gid_t gid = (gid_t) -1;
1165                 sigset_t ss;
1166                 int i, err;
1167
1168                 /* child */
1169
1170                 rename_process_from_path(command->path);
1171
1172                 /* We reset exactly these signals, since they are the
1173                  * only ones we set to SIG_IGN in the main daemon. All
1174                  * others we leave untouched because we set them to
1175                  * SIG_DFL or a valid handler initially, both of which
1176                  * will be demoted to SIG_DFL. */
1177                 default_signals(SIGNALS_CRASH_HANDLER,
1178                                 SIGNALS_IGNORE, -1);
1179
1180                 if (context->ignore_sigpipe)
1181                         ignore_signals(SIGPIPE, -1);
1182
1183                 assert_se(sigemptyset(&ss) == 0);
1184                 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1185                         err = -errno;
1186                         r = EXIT_SIGNAL_MASK;
1187                         goto fail_child;
1188                 }
1189
1190                 if (idle_pipe)
1191                         do_idle_pipe_dance(idle_pipe);
1192
1193                 /* Close sockets very early to make sure we don't
1194                  * block init reexecution because it cannot bind its
1195                  * sockets */
1196                 log_forget_fds();
1197
1198                 if (socket_fd >= 0)
1199                         dont_close[n_dont_close++] = socket_fd;
1200                 if (n_fds > 0) {
1201                         memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1202                         n_dont_close += n_fds;
1203                 }
1204                 if (runtime) {
1205                         if (runtime->netns_storage_socket[0] >= 0)
1206                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1207                         if (runtime->netns_storage_socket[1] >= 0)
1208                                 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1209                 }
1210
1211                 err = close_all_fds(dont_close, n_dont_close);
1212                 if (err < 0) {
1213                         r = EXIT_FDS;
1214                         goto fail_child;
1215                 }
1216
1217                 if (!context->same_pgrp)
1218                         if (setsid() < 0) {
1219                                 err = -errno;
1220                                 r = EXIT_SETSID;
1221                                 goto fail_child;
1222                         }
1223
1224                 if (context->tcpwrap_name) {
1225                         if (socket_fd >= 0)
1226                                 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1227                                         err = -EACCES;
1228                                         r = EXIT_TCPWRAP;
1229                                         goto fail_child;
1230                                 }
1231
1232                         for (i = 0; i < (int) n_fds; i++) {
1233                                 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1234                                         err = -EACCES;
1235                                         r = EXIT_TCPWRAP;
1236                                         goto fail_child;
1237                                 }
1238                         }
1239                 }
1240
1241                 exec_context_tty_reset(context);
1242
1243                 if (confirm_spawn) {
1244                         char response;
1245
1246                         err = ask_for_confirmation(&response, argv);
1247                         if (err == -ETIMEDOUT)
1248                                 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1249                         else if (err < 0)
1250                                 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1251                         else if (response == 's') {
1252                                 write_confirm_message("Skipping execution.\n");
1253                                 err = -ECANCELED;
1254                                 r = EXIT_CONFIRM;
1255                                 goto fail_child;
1256                         } else if (response == 'n') {
1257                                 write_confirm_message("Failing execution.\n");
1258                                 err = r = 0;
1259                                 goto fail_child;
1260                         }
1261                 }
1262
1263                 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1264                  * must sure to drop O_NONBLOCK */
1265                 if (socket_fd >= 0)
1266                         fd_nonblock(socket_fd, false);
1267
1268                 err = setup_input(context, socket_fd, apply_tty_stdin);
1269                 if (err < 0) {
1270                         r = EXIT_STDIN;
1271                         goto fail_child;
1272                 }
1273
1274                 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1275                 if (err < 0) {
1276                         r = EXIT_STDOUT;
1277                         goto fail_child;
1278                 }
1279
1280                 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1281                 if (err < 0) {
1282                         r = EXIT_STDERR;
1283                         goto fail_child;
1284                 }
1285
1286                 if (cgroup_path) {
1287                         err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1288                         if (err < 0) {
1289                                 r = EXIT_CGROUP;
1290                                 goto fail_child;
1291                         }
1292                 }
1293
1294                 if (context->oom_score_adjust_set) {
1295                         char t[16];
1296
1297                         snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1298                         char_array_0(t);
1299
1300                         if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1301                                 err = -errno;
1302                                 r = EXIT_OOM_ADJUST;
1303                                 goto fail_child;
1304                         }
1305                 }
1306
1307                 if (context->nice_set)
1308                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1309                                 err = -errno;
1310                                 r = EXIT_NICE;
1311                                 goto fail_child;
1312                         }
1313
1314                 if (context->cpu_sched_set) {
1315                         struct sched_param param = {
1316                                 .sched_priority = context->cpu_sched_priority,
1317                         };
1318
1319                         r = sched_setscheduler(0,
1320                                                context->cpu_sched_policy |
1321                                                (context->cpu_sched_reset_on_fork ?
1322                                                 SCHED_RESET_ON_FORK : 0),
1323                                                &param);
1324                         if (r < 0) {
1325                                 err = -errno;
1326                                 r = EXIT_SETSCHEDULER;
1327                                 goto fail_child;
1328                         }
1329                 }
1330
1331                 if (context->cpuset)
1332                         if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1333                                 err = -errno;
1334                                 r = EXIT_CPUAFFINITY;
1335                                 goto fail_child;
1336                         }
1337
1338                 if (context->ioprio_set)
1339                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1340                                 err = -errno;
1341                                 r = EXIT_IOPRIO;
1342                                 goto fail_child;
1343                         }
1344
1345                 if (context->timer_slack_nsec != (nsec_t) -1)
1346                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1347                                 err = -errno;
1348                                 r = EXIT_TIMERSLACK;
1349                                 goto fail_child;
1350                         }
1351
1352                 if (context->utmp_id)
1353                         utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1354
1355                 if (context->user) {
1356                         username = context->user;
1357                         err = get_user_creds(&username, &uid, &gid, &home, &shell);
1358                         if (err < 0) {
1359                                 r = EXIT_USER;
1360                                 goto fail_child;
1361                         }
1362
1363                         if (is_terminal_input(context->std_input)) {
1364                                 err = chown_terminal(STDIN_FILENO, uid);
1365                                 if (err < 0) {
1366                                         r = EXIT_STDIN;
1367                                         goto fail_child;
1368                                 }
1369                         }
1370                 }
1371
1372 #ifdef HAVE_PAM
1373                 if (cgroup_path && context->user && context->pam_name) {
1374                         err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1375                         if (err < 0) {
1376                                 r = EXIT_CGROUP;
1377                                 goto fail_child;
1378                         }
1379
1380
1381                         err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1382                         if (err < 0) {
1383                                 r = EXIT_CGROUP;
1384                                 goto fail_child;
1385                         }
1386                 }
1387 #endif
1388
1389                 if (apply_permissions) {
1390                         err = enforce_groups(context, username, gid);
1391                         if (err < 0) {
1392                                 r = EXIT_GROUP;
1393                                 goto fail_child;
1394                         }
1395                 }
1396
1397                 umask(context->umask);
1398
1399 #ifdef HAVE_PAM
1400                 if (apply_permissions && context->pam_name && username) {
1401                         err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1402                         if (err < 0) {
1403                                 r = EXIT_PAM;
1404                                 goto fail_child;
1405                         }
1406                 }
1407 #endif
1408                 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1409                         err = setup_netns(runtime->netns_storage_socket);
1410                         if (err < 0) {
1411                                 r = EXIT_NETWORK;
1412                                 goto fail_child;
1413                         }
1414                 }
1415
1416                 if (!strv_isempty(context->read_write_dirs) ||
1417                     !strv_isempty(context->read_only_dirs) ||
1418                     !strv_isempty(context->inaccessible_dirs) ||
1419                     context->mount_flags != 0 ||
1420                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1421                     context->private_devices) {
1422
1423                         char *tmp = NULL, *var = NULL;
1424
1425                         /* The runtime struct only contains the parent
1426                          * of the private /tmp, which is
1427                          * non-accessible to world users. Inside of it
1428                          * there's a /tmp that is sticky, and that's
1429                          * the one we want to use here. */
1430
1431                         if (context->private_tmp && runtime) {
1432                                 if (runtime->tmp_dir)
1433                                         tmp = strappenda(runtime->tmp_dir, "/tmp");
1434                                 if (runtime->var_tmp_dir)
1435                                         var = strappenda(runtime->var_tmp_dir, "/tmp");
1436                         }
1437
1438                         err = setup_namespace(
1439                                         context->read_write_dirs,
1440                                         context->read_only_dirs,
1441                                         context->inaccessible_dirs,
1442                                         tmp,
1443                                         var,
1444                                         context->private_devices,
1445                                         context->mount_flags);
1446
1447                         if (err < 0) {
1448                                 r = EXIT_NAMESPACE;
1449                                 goto fail_child;
1450                         }
1451                 }
1452
1453                 if (apply_chroot) {
1454                         if (context->root_directory)
1455                                 if (chroot(context->root_directory) < 0) {
1456                                         err = -errno;
1457                                         r = EXIT_CHROOT;
1458                                         goto fail_child;
1459                                 }
1460
1461                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1462                                 err = -errno;
1463                                 r = EXIT_CHDIR;
1464                                 goto fail_child;
1465                         }
1466                 } else {
1467                         _cleanup_free_ char *d = NULL;
1468
1469                         if (asprintf(&d, "%s/%s",
1470                                      context->root_directory ? context->root_directory : "",
1471                                      context->working_directory ? context->working_directory : "") < 0) {
1472                                 err = -ENOMEM;
1473                                 r = EXIT_MEMORY;
1474                                 goto fail_child;
1475                         }
1476
1477                         if (chdir(d) < 0) {
1478                                 err = -errno;
1479                                 r = EXIT_CHDIR;
1480                                 goto fail_child;
1481                         }
1482                 }
1483
1484                 /* We repeat the fd closing here, to make sure that
1485                  * nothing is leaked from the PAM modules */
1486                 err = close_all_fds(fds, n_fds);
1487                 if (err >= 0)
1488                         err = shift_fds(fds, n_fds);
1489                 if (err >= 0)
1490                         err = flags_fds(fds, n_fds, context->non_blocking);
1491                 if (err < 0) {
1492                         r = EXIT_FDS;
1493                         goto fail_child;
1494                 }
1495
1496                 if (apply_permissions) {
1497
1498                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
1499                                 if (!context->rlimit[i])
1500                                         continue;
1501
1502                                 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1503                                         err = -errno;
1504                                         r = EXIT_LIMITS;
1505                                         goto fail_child;
1506                                 }
1507                         }
1508
1509                         if (context->capability_bounding_set_drop) {
1510                                 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1511                                 if (err < 0) {
1512                                         r = EXIT_CAPABILITIES;
1513                                         goto fail_child;
1514                                 }
1515                         }
1516
1517                         if (context->user) {
1518                                 err = enforce_user(context, uid);
1519                                 if (err < 0) {
1520                                         r = EXIT_USER;
1521                                         goto fail_child;
1522                                 }
1523                         }
1524
1525                         /* PR_GET_SECUREBITS is not privileged, while
1526                          * PR_SET_SECUREBITS is. So to suppress
1527                          * potential EPERMs we'll try not to call
1528                          * PR_SET_SECUREBITS unless necessary. */
1529                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1530                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1531                                         err = -errno;
1532                                         r = EXIT_SECUREBITS;
1533                                         goto fail_child;
1534                                 }
1535
1536                         if (context->capabilities)
1537                                 if (cap_set_proc(context->capabilities) < 0) {
1538                                         err = -errno;
1539                                         r = EXIT_CAPABILITIES;
1540                                         goto fail_child;
1541                                 }
1542
1543                         if (context->no_new_privileges)
1544                                 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1545                                         err = -errno;
1546                                         r = EXIT_NO_NEW_PRIVILEGES;
1547                                         goto fail_child;
1548                                 }
1549
1550 #ifdef HAVE_SECCOMP
1551                         if (context->syscall_filter) {
1552                                 err = apply_seccomp(context);
1553                                 if (err < 0) {
1554                                         r = EXIT_SECCOMP;
1555                                         goto fail_child;
1556                                 }
1557                         }
1558 #endif
1559
1560 #ifdef HAVE_SELINUX
1561                         if (context->selinux_context && use_selinux()) {
1562                                 bool ignore;
1563                                 char* c;
1564
1565                                 c = context->selinux_context;
1566                                 if (c[0] == '-') {
1567                                         c++;
1568                                         ignore = true;
1569                                 } else
1570                                         ignore = false;
1571
1572                                 err = setexeccon(c);
1573                                 if (err < 0 && !ignore) {
1574                                         r = EXIT_SELINUX_CONTEXT;
1575                                         goto fail_child;
1576                                 }
1577                         }
1578 #endif
1579                 }
1580
1581                 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1582                 if (r < 0) {
1583                         r = EXIT_MEMORY;
1584                         goto fail_child;
1585                 }
1586
1587                 final_env = strv_env_merge(5,
1588                                            environment,
1589                                            our_env,
1590                                            context->environment,
1591                                            files_env,
1592                                            pam_env,
1593                                            NULL);
1594                 if (!final_env) {
1595                         err = -ENOMEM;
1596                         r = EXIT_MEMORY;
1597                         goto fail_child;
1598                 }
1599
1600                 final_argv = replace_env_argv(argv, final_env);
1601                 if (!final_argv) {
1602                         err = -ENOMEM;
1603                         r = EXIT_MEMORY;
1604                         goto fail_child;
1605                 }
1606
1607                 final_env = strv_env_clean(final_env);
1608
1609                 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1610                         line = exec_command_line(final_argv);
1611                         if (line) {
1612                                 log_open();
1613                                 log_struct_unit(LOG_DEBUG,
1614                                                 unit_id,
1615                                                 "EXECUTABLE=%s", command->path,
1616                                                 "MESSAGE=Executing: %s", line,
1617                                                 NULL);
1618                                 log_close();
1619                                 free(line);
1620                                 line = NULL;
1621                         }
1622                 }
1623                 execve(command->path, final_argv, final_env);
1624                 err = -errno;
1625                 r = EXIT_EXEC;
1626
1627         fail_child:
1628                 if (r != 0) {
1629                         log_open();
1630                         log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1631                                    "EXECUTABLE=%s", command->path,
1632                                    "MESSAGE=Failed at step %s spawning %s: %s",
1633                                           exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1634                                           command->path, strerror(-err),
1635                                    "ERRNO=%d", -err,
1636                                    NULL);
1637                         log_close();
1638                 }
1639
1640                 _exit(r);
1641         }
1642
1643         log_struct_unit(LOG_DEBUG,
1644                         unit_id,
1645                         "MESSAGE=Forked %s as "PID_FMT,
1646                         command->path, pid,
1647                         NULL);
1648
1649         /* We add the new process to the cgroup both in the child (so
1650          * that we can be sure that no user code is ever executed
1651          * outside of the cgroup) and in the parent (so that we can be
1652          * sure that when we kill the cgroup the process will be
1653          * killed too). */
1654         if (cgroup_path)
1655                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1656
1657         exec_status_start(&command->exec_status, pid);
1658
1659         *ret = pid;
1660         return 0;
1661 }
1662
1663 void exec_context_init(ExecContext *c) {
1664         assert(c);
1665
1666         c->umask = 0022;
1667         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1668         c->cpu_sched_policy = SCHED_OTHER;
1669         c->syslog_priority = LOG_DAEMON|LOG_INFO;
1670         c->syslog_level_prefix = true;
1671         c->ignore_sigpipe = true;
1672         c->timer_slack_nsec = (nsec_t) -1;
1673 }
1674
1675 void exec_context_done(ExecContext *c) {
1676         unsigned l;
1677
1678         assert(c);
1679
1680         strv_free(c->environment);
1681         c->environment = NULL;
1682
1683         strv_free(c->environment_files);
1684         c->environment_files = NULL;
1685
1686         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1687                 free(c->rlimit[l]);
1688                 c->rlimit[l] = NULL;
1689         }
1690
1691         free(c->working_directory);
1692         c->working_directory = NULL;
1693         free(c->root_directory);
1694         c->root_directory = NULL;
1695
1696         free(c->tty_path);
1697         c->tty_path = NULL;
1698
1699         free(c->tcpwrap_name);
1700         c->tcpwrap_name = NULL;
1701
1702         free(c->syslog_identifier);
1703         c->syslog_identifier = NULL;
1704
1705         free(c->user);
1706         c->user = NULL;
1707
1708         free(c->group);
1709         c->group = NULL;
1710
1711         strv_free(c->supplementary_groups);
1712         c->supplementary_groups = NULL;
1713
1714         free(c->pam_name);
1715         c->pam_name = NULL;
1716
1717         if (c->capabilities) {
1718                 cap_free(c->capabilities);
1719                 c->capabilities = NULL;
1720         }
1721
1722         strv_free(c->read_only_dirs);
1723         c->read_only_dirs = NULL;
1724
1725         strv_free(c->read_write_dirs);
1726         c->read_write_dirs = NULL;
1727
1728         strv_free(c->inaccessible_dirs);
1729         c->inaccessible_dirs = NULL;
1730
1731         if (c->cpuset)
1732                 CPU_FREE(c->cpuset);
1733
1734         free(c->utmp_id);
1735         c->utmp_id = NULL;
1736
1737         free(c->selinux_context);
1738         c->selinux_context = NULL;
1739
1740 #ifdef HAVE_SECCOMP
1741         set_free(c->syscall_filter);
1742         c->syscall_filter = NULL;
1743 #endif
1744 }
1745
1746 void exec_command_done(ExecCommand *c) {
1747         assert(c);
1748
1749         free(c->path);
1750         c->path = NULL;
1751
1752         strv_free(c->argv);
1753         c->argv = NULL;
1754 }
1755
1756 void exec_command_done_array(ExecCommand *c, unsigned n) {
1757         unsigned i;
1758
1759         for (i = 0; i < n; i++)
1760                 exec_command_done(c+i);
1761 }
1762
1763 void exec_command_free_list(ExecCommand *c) {
1764         ExecCommand *i;
1765
1766         while ((i = c)) {
1767                 LIST_REMOVE(command, c, i);
1768                 exec_command_done(i);
1769                 free(i);
1770         }
1771 }
1772
1773 void exec_command_free_array(ExecCommand **c, unsigned n) {
1774         unsigned i;
1775
1776         for (i = 0; i < n; i++) {
1777                 exec_command_free_list(c[i]);
1778                 c[i] = NULL;
1779         }
1780 }
1781
1782 int exec_context_load_environment(const ExecContext *c, char ***l) {
1783         char **i, **r = NULL;
1784
1785         assert(c);
1786         assert(l);
1787
1788         STRV_FOREACH(i, c->environment_files) {
1789                 char *fn;
1790                 int k;
1791                 bool ignore = false;
1792                 char **p;
1793                 _cleanup_globfree_ glob_t pglob = {};
1794                 int count, n;
1795
1796                 fn = *i;
1797
1798                 if (fn[0] == '-') {
1799                         ignore = true;
1800                         fn ++;
1801                 }
1802
1803                 if (!path_is_absolute(fn)) {
1804                         if (ignore)
1805                                 continue;
1806
1807                         strv_free(r);
1808                         return -EINVAL;
1809                 }
1810
1811                 /* Filename supports globbing, take all matching files */
1812                 errno = 0;
1813                 if (glob(fn, 0, NULL, &pglob) != 0) {
1814                         if (ignore)
1815                                 continue;
1816
1817                         strv_free(r);
1818                         return errno ? -errno : -EINVAL;
1819                 }
1820                 count = pglob.gl_pathc;
1821                 if (count == 0) {
1822                         if (ignore)
1823                                 continue;
1824
1825                         strv_free(r);
1826                         return -EINVAL;
1827                 }
1828                 for (n = 0; n < count; n++) {
1829                         k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1830                         if (k < 0) {
1831                                 if (ignore)
1832                                         continue;
1833
1834                                 strv_free(r);
1835                                 return k;
1836                         }
1837                         /* Log invalid environment variables with filename */
1838                         if (p)
1839                                 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1840
1841                         if (r == NULL)
1842                                 r = p;
1843                         else {
1844                                 char **m;
1845
1846                                 m = strv_env_merge(2, r, p);
1847                                 strv_free(r);
1848                                 strv_free(p);
1849                                 if (!m)
1850                                         return -ENOMEM;
1851
1852                                 r = m;
1853                         }
1854                 }
1855         }
1856
1857         *l = r;
1858
1859         return 0;
1860 }
1861
1862 static bool tty_may_match_dev_console(const char *tty) {
1863         char *active = NULL, *console;
1864         bool b;
1865
1866         if (startswith(tty, "/dev/"))
1867                 tty += 5;
1868
1869         /* trivial identity? */
1870         if (streq(tty, "console"))
1871                 return true;
1872
1873         console = resolve_dev_console(&active);
1874         /* if we could not resolve, assume it may */
1875         if (!console)
1876                 return true;
1877
1878         /* "tty0" means the active VC, so it may be the same sometimes */
1879         b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1880         free(active);
1881
1882         return b;
1883 }
1884
1885 bool exec_context_may_touch_console(ExecContext *ec) {
1886         return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1887                 is_terminal_input(ec->std_input) ||
1888                 is_terminal_output(ec->std_output) ||
1889                 is_terminal_output(ec->std_error)) &&
1890                tty_may_match_dev_console(tty_path(ec));
1891 }
1892
1893 static void strv_fprintf(FILE *f, char **l) {
1894         char **g;
1895
1896         assert(f);
1897
1898         STRV_FOREACH(g, l)
1899                 fprintf(f, " %s", *g);
1900 }
1901
1902 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1903         char **e;
1904         unsigned i;
1905
1906         assert(c);
1907         assert(f);
1908
1909         prefix = strempty(prefix);
1910
1911         fprintf(f,
1912                 "%sUMask: %04o\n"
1913                 "%sWorkingDirectory: %s\n"
1914                 "%sRootDirectory: %s\n"
1915                 "%sNonBlocking: %s\n"
1916                 "%sPrivateTmp: %s\n"
1917                 "%sPrivateNetwork: %s\n"
1918                 "%sPrivateDevices: %s\n"
1919                 "%sIgnoreSIGPIPE: %s\n",
1920                 prefix, c->umask,
1921                 prefix, c->working_directory ? c->working_directory : "/",
1922                 prefix, c->root_directory ? c->root_directory : "/",
1923                 prefix, yes_no(c->non_blocking),
1924                 prefix, yes_no(c->private_tmp),
1925                 prefix, yes_no(c->private_network),
1926                 prefix, yes_no(c->private_devices),
1927                 prefix, yes_no(c->ignore_sigpipe));
1928
1929         STRV_FOREACH(e, c->environment)
1930                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1931
1932         STRV_FOREACH(e, c->environment_files)
1933                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1934
1935         if (c->tcpwrap_name)
1936                 fprintf(f,
1937                         "%sTCPWrapName: %s\n",
1938                         prefix, c->tcpwrap_name);
1939
1940         if (c->nice_set)
1941                 fprintf(f,
1942                         "%sNice: %i\n",
1943                         prefix, c->nice);
1944
1945         if (c->oom_score_adjust_set)
1946                 fprintf(f,
1947                         "%sOOMScoreAdjust: %i\n",
1948                         prefix, c->oom_score_adjust);
1949
1950         for (i = 0; i < RLIM_NLIMITS; i++)
1951                 if (c->rlimit[i])
1952                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1953
1954         if (c->ioprio_set) {
1955                 char *class_str;
1956                 int r;
1957
1958                 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1959                 if (r < 0)
1960                         class_str = NULL;
1961                 fprintf(f,
1962                         "%sIOSchedulingClass: %s\n"
1963                         "%sIOPriority: %i\n",
1964                         prefix, strna(class_str),
1965                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1966                 free(class_str);
1967         }
1968
1969         if (c->cpu_sched_set) {
1970                 char *policy_str;
1971                 int r;
1972
1973                 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1974                 if (r < 0)
1975                         policy_str = NULL;
1976                 fprintf(f,
1977                         "%sCPUSchedulingPolicy: %s\n"
1978                         "%sCPUSchedulingPriority: %i\n"
1979                         "%sCPUSchedulingResetOnFork: %s\n",
1980                         prefix, strna(policy_str),
1981                         prefix, c->cpu_sched_priority,
1982                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1983                 free(policy_str);
1984         }
1985
1986         if (c->cpuset) {
1987                 fprintf(f, "%sCPUAffinity:", prefix);
1988                 for (i = 0; i < c->cpuset_ncpus; i++)
1989                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1990                                 fprintf(f, " %u", i);
1991                 fputs("\n", f);
1992         }
1993
1994         if (c->timer_slack_nsec != (nsec_t) -1)
1995                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
1996
1997         fprintf(f,
1998                 "%sStandardInput: %s\n"
1999                 "%sStandardOutput: %s\n"
2000                 "%sStandardError: %s\n",
2001                 prefix, exec_input_to_string(c->std_input),
2002                 prefix, exec_output_to_string(c->std_output),
2003                 prefix, exec_output_to_string(c->std_error));
2004
2005         if (c->tty_path)
2006                 fprintf(f,
2007                         "%sTTYPath: %s\n"
2008                         "%sTTYReset: %s\n"
2009                         "%sTTYVHangup: %s\n"
2010                         "%sTTYVTDisallocate: %s\n",
2011                         prefix, c->tty_path,
2012                         prefix, yes_no(c->tty_reset),
2013                         prefix, yes_no(c->tty_vhangup),
2014                         prefix, yes_no(c->tty_vt_disallocate));
2015
2016         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2017             c->std_output == EXEC_OUTPUT_KMSG ||
2018             c->std_output == EXEC_OUTPUT_JOURNAL ||
2019             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2020             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2021             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2022             c->std_error == EXEC_OUTPUT_SYSLOG ||
2023             c->std_error == EXEC_OUTPUT_KMSG ||
2024             c->std_error == EXEC_OUTPUT_JOURNAL ||
2025             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2026             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2027             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2028
2029                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2030
2031                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2032                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2033
2034                 fprintf(f,
2035                         "%sSyslogFacility: %s\n"
2036                         "%sSyslogLevel: %s\n",
2037                         prefix, strna(fac_str),
2038                         prefix, strna(lvl_str));
2039         }
2040
2041         if (c->capabilities) {
2042                 _cleanup_cap_free_charp_ char *t;
2043
2044                 t = cap_to_text(c->capabilities, NULL);
2045                 if (t)
2046                         fprintf(f, "%sCapabilities: %s\n", prefix, t);
2047         }
2048
2049         if (c->secure_bits)
2050                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2051                         prefix,
2052                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2053                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2054                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2055                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2056                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2057                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2058
2059         if (c->capability_bounding_set_drop) {
2060                 unsigned long l;
2061                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2062
2063                 for (l = 0; l <= cap_last_cap(); l++)
2064                         if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2065                                 _cleanup_cap_free_charp_ char *t;
2066
2067                                 t = cap_to_name(l);
2068                                 if (t)
2069                                         fprintf(f, " %s", t);
2070                         }
2071
2072                 fputs("\n", f);
2073         }
2074
2075         if (c->user)
2076                 fprintf(f, "%sUser: %s\n", prefix, c->user);
2077         if (c->group)
2078                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2079
2080         if (strv_length(c->supplementary_groups) > 0) {
2081                 fprintf(f, "%sSupplementaryGroups:", prefix);
2082                 strv_fprintf(f, c->supplementary_groups);
2083                 fputs("\n", f);
2084         }
2085
2086         if (c->pam_name)
2087                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2088
2089         if (strv_length(c->read_write_dirs) > 0) {
2090                 fprintf(f, "%sReadWriteDirs:", prefix);
2091                 strv_fprintf(f, c->read_write_dirs);
2092                 fputs("\n", f);
2093         }
2094
2095         if (strv_length(c->read_only_dirs) > 0) {
2096                 fprintf(f, "%sReadOnlyDirs:", prefix);
2097                 strv_fprintf(f, c->read_only_dirs);
2098                 fputs("\n", f);
2099         }
2100
2101         if (strv_length(c->inaccessible_dirs) > 0) {
2102                 fprintf(f, "%sInaccessibleDirs:", prefix);
2103                 strv_fprintf(f, c->inaccessible_dirs);
2104                 fputs("\n", f);
2105         }
2106
2107         if (c->utmp_id)
2108                 fprintf(f,
2109                         "%sUtmpIdentifier: %s\n",
2110                         prefix, c->utmp_id);
2111
2112         if (c->selinux_context)
2113                 fprintf(f,
2114                         "%sSELinuxContext: %s\n",
2115                         prefix, c->selinux_context);
2116
2117         if (c->syscall_filter) {
2118 #ifdef HAVE_SECCOMP
2119                 Iterator j;
2120                 void *id;
2121                 bool first = true;
2122 #endif
2123
2124                 fprintf(f,
2125                         "%sSystemCallFilter: \n",
2126                         prefix);
2127
2128                 if (!c->syscall_whitelist)
2129                         fputc('~', f);
2130
2131 #ifdef HAVE_SECCOMP
2132                 SET_FOREACH(id, c->syscall_filter, j) {
2133                         _cleanup_free_ char *name = NULL;
2134
2135                         if (first)
2136                                 first = false;
2137                         else
2138                                 fputc(' ', f);
2139
2140                         name = seccomp_syscall_resolve_num_arch(PTR_TO_INT(id)-1, SCMP_ARCH_NATIVE);
2141                         fputs(strna(name), f);
2142                 }
2143 #endif
2144
2145                 fputc('\n', f);
2146         }
2147
2148         if (c->syscall_errno != 0)
2149                 fprintf(f,
2150                         "%sSystemCallErrorNumber: %s\n",
2151                         prefix, strna(errno_to_name(c->syscall_errno)));
2152 }
2153
2154 void exec_status_start(ExecStatus *s, pid_t pid) {
2155         assert(s);
2156
2157         zero(*s);
2158         s->pid = pid;
2159         dual_timestamp_get(&s->start_timestamp);
2160 }
2161
2162 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2163         assert(s);
2164
2165         if (s->pid && s->pid != pid)
2166                 zero(*s);
2167
2168         s->pid = pid;
2169         dual_timestamp_get(&s->exit_timestamp);
2170
2171         s->code = code;
2172         s->status = status;
2173
2174         if (context) {
2175                 if (context->utmp_id)
2176                         utmp_put_dead_process(context->utmp_id, pid, code, status);
2177
2178                 exec_context_tty_reset(context);
2179         }
2180 }
2181
2182 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2183         char buf[FORMAT_TIMESTAMP_MAX];
2184
2185         assert(s);
2186         assert(f);
2187
2188         if (!prefix)
2189                 prefix = "";
2190
2191         if (s->pid <= 0)
2192                 return;
2193
2194         fprintf(f,
2195                 "%sPID: "PID_FMT"\n",
2196                 prefix, s->pid);
2197
2198         if (s->start_timestamp.realtime > 0)
2199                 fprintf(f,
2200                         "%sStart Timestamp: %s\n",
2201                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2202
2203         if (s->exit_timestamp.realtime > 0)
2204                 fprintf(f,
2205                         "%sExit Timestamp: %s\n"
2206                         "%sExit Code: %s\n"
2207                         "%sExit Status: %i\n",
2208                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2209                         prefix, sigchld_code_to_string(s->code),
2210                         prefix, s->status);
2211 }
2212
2213 char *exec_command_line(char **argv) {
2214         size_t k;
2215         char *n, *p, **a;
2216         bool first = true;
2217
2218         assert(argv);
2219
2220         k = 1;
2221         STRV_FOREACH(a, argv)
2222                 k += strlen(*a)+3;
2223
2224         if (!(n = new(char, k)))
2225                 return NULL;
2226
2227         p = n;
2228         STRV_FOREACH(a, argv) {
2229
2230                 if (!first)
2231                         *(p++) = ' ';
2232                 else
2233                         first = false;
2234
2235                 if (strpbrk(*a, WHITESPACE)) {
2236                         *(p++) = '\'';
2237                         p = stpcpy(p, *a);
2238                         *(p++) = '\'';
2239                 } else
2240                         p = stpcpy(p, *a);
2241
2242         }
2243
2244         *p = 0;
2245
2246         /* FIXME: this doesn't really handle arguments that have
2247          * spaces and ticks in them */
2248
2249         return n;
2250 }
2251
2252 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2253         char *p2;
2254         const char *prefix2;
2255
2256         char *cmd;
2257
2258         assert(c);
2259         assert(f);
2260
2261         if (!prefix)
2262                 prefix = "";
2263         p2 = strappend(prefix, "\t");
2264         prefix2 = p2 ? p2 : prefix;
2265
2266         cmd = exec_command_line(c->argv);
2267
2268         fprintf(f,
2269                 "%sCommand Line: %s\n",
2270                 prefix, cmd ? cmd : strerror(ENOMEM));
2271
2272         free(cmd);
2273
2274         exec_status_dump(&c->exec_status, f, prefix2);
2275
2276         free(p2);
2277 }
2278
2279 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2280         assert(f);
2281
2282         if (!prefix)
2283                 prefix = "";
2284
2285         LIST_FOREACH(command, c, c)
2286                 exec_command_dump(c, f, prefix);
2287 }
2288
2289 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2290         ExecCommand *end;
2291
2292         assert(l);
2293         assert(e);
2294
2295         if (*l) {
2296                 /* It's kind of important, that we keep the order here */
2297                 LIST_FIND_TAIL(command, *l, end);
2298                 LIST_INSERT_AFTER(command, *l, end, e);
2299         } else
2300               *l = e;
2301 }
2302
2303 int exec_command_set(ExecCommand *c, const char *path, ...) {
2304         va_list ap;
2305         char **l, *p;
2306
2307         assert(c);
2308         assert(path);
2309
2310         va_start(ap, path);
2311         l = strv_new_ap(path, ap);
2312         va_end(ap);
2313
2314         if (!l)
2315                 return -ENOMEM;
2316
2317         p = strdup(path);
2318         if (!p) {
2319                 strv_free(l);
2320                 return -ENOMEM;
2321         }
2322
2323         free(c->path);
2324         c->path = p;
2325
2326         strv_free(c->argv);
2327         c->argv = l;
2328
2329         return 0;
2330 }
2331
2332 static int exec_runtime_allocate(ExecRuntime **rt) {
2333
2334         if (*rt)
2335                 return 0;
2336
2337         *rt = new0(ExecRuntime, 1);
2338         if (!*rt)
2339                 return -ENOMEM;
2340
2341         (*rt)->n_ref = 1;
2342         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2343
2344         return 0;
2345 }
2346
2347 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2348         int r;
2349
2350         assert(rt);
2351         assert(c);
2352         assert(id);
2353
2354         if (*rt)
2355                 return 1;
2356
2357         if (!c->private_network && !c->private_tmp)
2358                 return 0;
2359
2360         r = exec_runtime_allocate(rt);
2361         if (r < 0)
2362                 return r;
2363
2364         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2365                 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2366                         return -errno;
2367         }
2368
2369         if (c->private_tmp && !(*rt)->tmp_dir) {
2370                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2371                 if (r < 0)
2372                         return r;
2373         }
2374
2375         return 1;
2376 }
2377
2378 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2379         assert(r);
2380         assert(r->n_ref > 0);
2381
2382         r->n_ref++;
2383         return r;
2384 }
2385
2386 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2387
2388         if (!r)
2389                 return NULL;
2390
2391         assert(r->n_ref > 0);
2392
2393         r->n_ref--;
2394         if (r->n_ref <= 0) {
2395                 free(r->tmp_dir);
2396                 free(r->var_tmp_dir);
2397                 close_pipe(r->netns_storage_socket);
2398                 free(r);
2399         }
2400
2401         return NULL;
2402 }
2403
2404 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2405         assert(u);
2406         assert(f);
2407         assert(fds);
2408
2409         if (!rt)
2410                 return 0;
2411
2412         if (rt->tmp_dir)
2413                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2414
2415         if (rt->var_tmp_dir)
2416                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2417
2418         if (rt->netns_storage_socket[0] >= 0) {
2419                 int copy;
2420
2421                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2422                 if (copy < 0)
2423                         return copy;
2424
2425                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2426         }
2427
2428         if (rt->netns_storage_socket[1] >= 0) {
2429                 int copy;
2430
2431                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2432                 if (copy < 0)
2433                         return copy;
2434
2435                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2436         }
2437
2438         return 0;
2439 }
2440
2441 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2442         int r;
2443
2444         assert(rt);
2445         assert(key);
2446         assert(value);
2447
2448         if (streq(key, "tmp-dir")) {
2449                 char *copy;
2450
2451                 r = exec_runtime_allocate(rt);
2452                 if (r < 0)
2453                         return r;
2454
2455                 copy = strdup(value);
2456                 if (!copy)
2457                         return log_oom();
2458
2459                 free((*rt)->tmp_dir);
2460                 (*rt)->tmp_dir = copy;
2461
2462         } else if (streq(key, "var-tmp-dir")) {
2463                 char *copy;
2464
2465                 r = exec_runtime_allocate(rt);
2466                 if (r < 0)
2467                         return r;
2468
2469                 copy = strdup(value);
2470                 if (!copy)
2471                         return log_oom();
2472
2473                 free((*rt)->var_tmp_dir);
2474                 (*rt)->var_tmp_dir = copy;
2475
2476         } else if (streq(key, "netns-socket-0")) {
2477                 int fd;
2478
2479                 r = exec_runtime_allocate(rt);
2480                 if (r < 0)
2481                         return r;
2482
2483                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2484                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2485                 else {
2486                         if ((*rt)->netns_storage_socket[0] >= 0)
2487                                 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2488
2489                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2490                 }
2491         } else if (streq(key, "netns-socket-1")) {
2492                 int fd;
2493
2494                 r = exec_runtime_allocate(rt);
2495                 if (r < 0)
2496                         return r;
2497
2498                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2499                         log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2500                 else {
2501                         if ((*rt)->netns_storage_socket[1] >= 0)
2502                                 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2503
2504                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2505                 }
2506         } else
2507                 return 0;
2508
2509         return 1;
2510 }
2511
2512 static void *remove_tmpdir_thread(void *p) {
2513         _cleanup_free_ char *path = p;
2514
2515         rm_rf_dangerous(path, false, true, false);
2516         return NULL;
2517 }
2518
2519 void exec_runtime_destroy(ExecRuntime *rt) {
2520         if (!rt)
2521                 return;
2522
2523         /* If there are multiple users of this, let's leave the stuff around */
2524         if (rt->n_ref > 1)
2525                 return;
2526
2527         if (rt->tmp_dir) {
2528                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2529                 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2530                 rt->tmp_dir = NULL;
2531         }
2532
2533         if (rt->var_tmp_dir) {
2534                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2535                 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2536                 rt->var_tmp_dir = NULL;
2537         }
2538
2539         close_pipe(rt->netns_storage_socket);
2540 }
2541
2542 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2543         [EXEC_INPUT_NULL] = "null",
2544         [EXEC_INPUT_TTY] = "tty",
2545         [EXEC_INPUT_TTY_FORCE] = "tty-force",
2546         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2547         [EXEC_INPUT_SOCKET] = "socket"
2548 };
2549
2550 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2551
2552 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2553         [EXEC_OUTPUT_INHERIT] = "inherit",
2554         [EXEC_OUTPUT_NULL] = "null",
2555         [EXEC_OUTPUT_TTY] = "tty",
2556         [EXEC_OUTPUT_SYSLOG] = "syslog",
2557         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2558         [EXEC_OUTPUT_KMSG] = "kmsg",
2559         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2560         [EXEC_OUTPUT_JOURNAL] = "journal",
2561         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2562         [EXEC_OUTPUT_SOCKET] = "socket"
2563 };
2564
2565 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);