chiark / gitweb /
units: install a few basic units by default
[elogind.git] / execute.c
1 /*-*- Mode: C; c-basic-offset: 8 -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <dirent.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <signal.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <grp.h>
36 #include <pwd.h>
37
38 #include "execute.h"
39 #include "strv.h"
40 #include "macro.h"
41 #include "util.h"
42 #include "log.h"
43 #include "ioprio.h"
44 #include "securebits.h"
45 #include "cgroup.h"
46
47 static int shift_fds(int fds[], unsigned n_fds) {
48         int start, restart_from;
49
50         if (n_fds <= 0)
51                 return 0;
52
53         /* Modifies the fds array! (sorts it) */
54
55         assert(fds);
56
57         start = 0;
58         for (;;) {
59                 int i;
60
61                 restart_from = -1;
62
63                 for (i = start; i < (int) n_fds; i++) {
64                         int nfd;
65
66                         /* Already at right index? */
67                         if (fds[i] == i+3)
68                                 continue;
69
70                         if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
71                                 return -errno;
72
73                         assert_se(close_nointr(fds[i]) == 0);
74                         fds[i] = nfd;
75
76                         /* Hmm, the fd we wanted isn't free? Then
77                          * let's remember that and try again from here*/
78                         if (nfd != i+3 && restart_from < 0)
79                                 restart_from = i;
80                 }
81
82                 if (restart_from < 0)
83                         break;
84
85                 start = restart_from;
86         }
87
88         return 0;
89 }
90
91 static int flags_fds(int fds[], unsigned n_fds, bool nonblock) {
92         unsigned i;
93         int r;
94
95         if (n_fds <= 0)
96                 return 0;
97
98         assert(fds);
99
100         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
101
102         for (i = 0; i < n_fds; i++) {
103
104                 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
105                         return r;
106
107                 /* We unconditionally drop FD_CLOEXEC from the fds,
108                  * since after all we want to pass these fds to our
109                  * children */
110
111                 if ((r = fd_cloexec(fds[i], false)) < 0)
112                         return r;
113         }
114
115         return 0;
116 }
117
118 static const char *tty_path(const ExecContext *context) {
119         assert(context);
120
121         if (context->tty_path)
122                 return context->tty_path;
123
124         return "/dev/console";
125 }
126
127 static int open_null_as(int flags, int nfd) {
128         int fd, r;
129
130         assert(nfd >= 0);
131
132         if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
133                 return -errno;
134
135         if (fd != nfd) {
136                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
137                 close_nointr(fd);
138         } else
139                 r = nfd;
140
141         return r;
142 }
143
144 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
145         int fd, r;
146         union {
147                 struct sockaddr sa;
148                 struct sockaddr_un un;
149         } sa;
150
151         assert(context);
152         assert(output < _EXEC_OUTPUT_MAX);
153         assert(ident);
154         assert(nfd >= 0);
155
156         if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
157                 return -errno;
158
159         zero(sa);
160         sa.sa.sa_family = AF_UNIX;
161         strncpy(sa.un.sun_path+1, LOGGER_SOCKET, sizeof(sa.un.sun_path)-1);
162
163         if (connect(fd, &sa.sa, sizeof(sa)) < 0) {
164                 close_nointr_nofail(fd);
165                 return -errno;
166         }
167
168         if (shutdown(fd, SHUT_RD) < 0) {
169                 close_nointr_nofail(fd);
170                 return -errno;
171         }
172
173         /* We speak a very simple protocol between log server
174          * and client: one line for the log destination (kmsg
175          * or syslog), followed by the priority field,
176          * followed by the process name. Since we replaced
177          * stdin/stderr we simple use stdio to write to
178          * it. Note that we use stderr, to minimize buffer
179          * flushing issues. */
180
181         dprintf(fd,
182                 "%s\n"
183                 "%i\n"
184                 "%s\n",
185                 output == EXEC_OUTPUT_KERNEL ? "kmsg" : "syslog",
186                 context->syslog_priority,
187                 context->syslog_identifier ? context->syslog_identifier : ident);
188
189         if (fd != nfd) {
190                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
191                 close_nointr(fd);
192         } else
193                 r = nfd;
194
195         return r;
196 }
197 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
198         int fd, r;
199
200         assert(path);
201         assert(nfd >= 0);
202
203         if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
204                 return fd;
205
206         if (fd != nfd) {
207                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
208                 close_nointr_nofail(fd);
209         } else
210                 r = nfd;
211
212         return r;
213 }
214
215 static bool is_terminal_input(ExecInput i) {
216         return
217                 i == EXEC_INPUT_TTY ||
218                 i == EXEC_INPUT_TTY_FORCE ||
219                 i == EXEC_INPUT_TTY_FAIL;
220 }
221
222 static int setup_input(const ExecContext *context) {
223         assert(context);
224
225         switch (context->std_input) {
226
227         case EXEC_INPUT_NULL:
228                 return open_null_as(O_RDONLY, STDIN_FILENO);
229
230         case EXEC_INPUT_TTY:
231         case EXEC_INPUT_TTY_FORCE:
232         case EXEC_INPUT_TTY_FAIL: {
233                 int fd, r;
234
235                 if ((fd = acquire_terminal(
236                                      tty_path(context),
237                                      context->std_input == EXEC_INPUT_TTY_FAIL,
238                                      context->std_input == EXEC_INPUT_TTY_FORCE)) < 0)
239                         return fd;
240
241                 if (fd != STDIN_FILENO) {
242                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
243                         close_nointr_nofail(fd);
244                 } else
245                         r = STDIN_FILENO;
246
247                 return r;
248         }
249
250         default:
251                 assert_not_reached("Unknown input type");
252         }
253 }
254
255 static int setup_output(const ExecContext *context, const char *ident) {
256         assert(context);
257         assert(ident);
258
259         /* This expects the input is already set up */
260
261         switch (context->std_output) {
262
263         case EXEC_OUTPUT_INHERIT:
264
265                 /* If the input is connected to a terminal, inherit that... */
266                 if (is_terminal_input(context->std_input))
267                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
268
269                 return 0;
270
271         case EXEC_OUTPUT_NULL:
272                 return open_null_as(O_WRONLY, STDOUT_FILENO);
273
274         case EXEC_OUTPUT_TTY: {
275                 if (is_terminal_input(context->std_input))
276                         return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
277
278                 /* We don't reset the terminal if this is just about output */
279                 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
280         }
281
282         case EXEC_OUTPUT_SYSLOG:
283         case EXEC_OUTPUT_KERNEL:
284                 return connect_logger_as(context, context->std_output, ident, STDOUT_FILENO);
285
286         default:
287                 assert_not_reached("Unknown output type");
288         }
289 }
290
291 static int setup_error(const ExecContext *context, const char *ident) {
292         assert(context);
293
294         /* This expects the input and output are already set up */
295
296         /* Don't change the stderr file descriptor if we inherit all
297          * the way and are not on a tty */
298         if (context->std_error == EXEC_OUTPUT_INHERIT &&
299             context->std_output == EXEC_OUTPUT_INHERIT &&
300             !is_terminal_input(context->std_input))
301                 return STDERR_FILENO;
302
303         /* Duplicate form stdout if possible */
304         if (context->std_error == context->std_output ||
305             context->std_error == EXEC_OUTPUT_INHERIT)
306                 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
307
308         switch (context->std_error) {
309
310         case EXEC_OUTPUT_NULL:
311                 return open_null_as(O_WRONLY, STDERR_FILENO);
312
313         case EXEC_OUTPUT_TTY:
314                 if (is_terminal_input(context->std_input))
315                         return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
316
317                 /* We don't reset the terminal if this is just about output */
318                 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
319
320         case EXEC_OUTPUT_SYSLOG:
321         case EXEC_OUTPUT_KERNEL:
322                 return connect_logger_as(context, context->std_error, ident, STDERR_FILENO);
323
324         default:
325                 assert_not_reached("Unknown error type");
326         }
327 }
328
329 static int setup_confirm_stdio(const ExecContext *context,
330                                int *_saved_stdin,
331                                int *_saved_stdout) {
332         int fd = -1, saved_stdin, saved_stdout = -1, r;
333
334         assert(context);
335         assert(_saved_stdin);
336         assert(_saved_stdout);
337
338         /* This returns positive EXIT_xxx return values instead of
339          * negative errno style values! */
340
341         if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
342                 return EXIT_STDIN;
343
344         if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
345                 r = EXIT_STDOUT;
346                 goto fail;
347         }
348
349         if ((fd = acquire_terminal(
350                              tty_path(context),
351                              context->std_input == EXEC_INPUT_TTY_FAIL,
352                              context->std_input == EXEC_INPUT_TTY_FORCE)) < 0) {
353                 r = EXIT_STDIN;
354                 goto fail;
355         }
356
357         if (dup2(fd, STDIN_FILENO) < 0) {
358                 r = EXIT_STDIN;
359                 goto fail;
360         }
361
362         if (dup2(fd, STDOUT_FILENO) < 0) {
363                 r = EXIT_STDOUT;
364                 goto fail;
365         }
366
367         if (fd >= 2)
368                 close_nointr_nofail(fd);
369
370         *_saved_stdin = saved_stdin;
371         *_saved_stdout = saved_stdout;
372
373         return 0;
374
375 fail:
376         if (saved_stdout >= 0)
377                 close_nointr_nofail(saved_stdout);
378
379         if (saved_stdin >= 0)
380                 close_nointr_nofail(saved_stdin);
381
382         if (fd >= 0)
383                 close_nointr_nofail(fd);
384
385         return r;
386 }
387
388 static int restore_conform_stdio(const ExecContext *context,
389                                  int *saved_stdin,
390                                  int *saved_stdout,
391                                  bool *keep_stdin,
392                                  bool *keep_stdout) {
393
394         assert(context);
395         assert(saved_stdin);
396         assert(*saved_stdin >= 0);
397         assert(saved_stdout);
398         assert(*saved_stdout >= 0);
399
400         /* This returns positive EXIT_xxx return values instead of
401          * negative errno style values! */
402
403         if (is_terminal_input(context->std_input)) {
404
405                 /* The service wants terminal input. */
406
407                 *keep_stdin = true;
408                 *keep_stdout =
409                         context->std_output == EXEC_OUTPUT_INHERIT ||
410                         context->std_output == EXEC_OUTPUT_TTY;
411
412         } else {
413                 /* If the service doesn't want a controlling terminal,
414                  * then we need to get rid entirely of what we have
415                  * already. */
416
417                 if (release_terminal() < 0)
418                         return EXIT_STDIN;
419
420                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
421                         return EXIT_STDIN;
422
423                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
424                         return EXIT_STDOUT;
425
426                 *keep_stdout = *keep_stdin = false;
427         }
428
429         return 0;
430 }
431
432 static int get_group_creds(const char *groupname, gid_t *gid) {
433         struct group *g;
434         unsigned long lu;
435
436         assert(groupname);
437         assert(gid);
438
439         /* We enforce some special rules for gid=0: in order to avoid
440          * NSS lookups for root we hardcode its data. */
441
442         if (streq(groupname, "root") || streq(groupname, "0")) {
443                 *gid = 0;
444                 return 0;
445         }
446
447         if (safe_atolu(groupname, &lu) >= 0) {
448                 errno = 0;
449                 g = getgrgid((gid_t) lu);
450         } else {
451                 errno = 0;
452                 g = getgrnam(groupname);
453         }
454
455         if (!g)
456                 return errno != 0 ? -errno : -ESRCH;
457
458         *gid = g->gr_gid;
459         return 0;
460 }
461
462 static int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home) {
463         struct passwd *p;
464         unsigned long lu;
465
466         assert(username);
467         assert(*username);
468         assert(uid);
469         assert(gid);
470         assert(home);
471
472         /* We enforce some special rules for uid=0: in order to avoid
473          * NSS lookups for root we hardcode its data. */
474
475         if (streq(*username, "root") || streq(*username, "0")) {
476                 *username = "root";
477                 *uid = 0;
478                 *gid = 0;
479                 *home = "/root";
480                 return 0;
481         }
482
483         if (safe_atolu(*username, &lu) >= 0) {
484                 errno = 0;
485                 p = getpwuid((uid_t) lu);
486
487                 /* If there are multiple users with the same id, make
488                  * sure to leave $USER to the configured value instead
489                  * of the first occurence in the database. However if
490                  * the uid was configured by a numeric uid, then let's
491                  * pick the real username from /etc/passwd. */
492                 if (*username && p)
493                         *username = p->pw_name;
494         } else {
495                 errno = 0;
496                 p = getpwnam(*username);
497         }
498
499         if (!p)
500                 return errno != 0 ? -errno : -ESRCH;
501
502         *uid = p->pw_uid;
503         *gid = p->pw_gid;
504         *home = p->pw_dir;
505         return 0;
506 }
507
508 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
509         bool keep_groups = false;
510         int r;
511
512         assert(context);
513
514         /* Lookup and ser GID and supplementary group list. Here too
515          * we avoid NSS lookups for gid=0. */
516
517         if (context->group || username) {
518
519                 if (context->group)
520                         if ((r = get_group_creds(context->group, &gid)) < 0)
521                                 return r;
522
523                 /* First step, initialize groups from /etc/groups */
524                 if (username && gid != 0) {
525                         if (initgroups(username, gid) < 0)
526                                 return -errno;
527
528                         keep_groups = true;
529                 }
530
531                 /* Second step, set our gids */
532                 if (setresgid(gid, gid, gid) < 0)
533                         return -errno;
534         }
535
536         if (context->supplementary_groups) {
537                 int ngroups_max, k;
538                 gid_t *gids;
539                 char **i;
540
541                 /* Final step, initialize any manually set supplementary groups */
542                 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
543
544                 if (!(gids = new(gid_t, ngroups_max)))
545                         return -ENOMEM;
546
547                 if (keep_groups) {
548                         if ((k = getgroups(ngroups_max, gids)) < 0) {
549                                 free(gids);
550                                 return -errno;
551                         }
552                 } else
553                         k = 0;
554
555                 STRV_FOREACH(i, context->supplementary_groups) {
556
557                         if (k >= ngroups_max) {
558                                 free(gids);
559                                 return -E2BIG;
560                         }
561
562                         if ((r = get_group_creds(*i, gids+k)) < 0) {
563                                 free(gids);
564                                 return r;
565                         }
566
567                         k++;
568                 }
569
570                 if (setgroups(k, gids) < 0) {
571                         free(gids);
572                         return -errno;
573                 }
574
575                 free(gids);
576         }
577
578         return 0;
579 }
580
581 static int enforce_user(const ExecContext *context, uid_t uid) {
582         int r;
583         assert(context);
584
585         /* Sets (but doesn't lookup) the uid and make sure we keep the
586          * capabilities while doing so. */
587
588         if (context->capabilities) {
589                 cap_t d;
590                 static const cap_value_t bits[] = {
591                         CAP_SETUID,   /* Necessary so that we can run setresuid() below */
592                         CAP_SETPCAP   /* Necessary so that we can set PR_SET_SECUREBITS later on */
593                 };
594
595                 /* First step: If we need to keep capabilities but
596                  * drop privileges we need to make sure we keep our
597                  * caps, whiel we drop priviliges. */
598                 if (uid != 0) {
599                         int sb = context->secure_bits|SECURE_KEEP_CAPS;
600
601                         if (prctl(PR_GET_SECUREBITS) != sb)
602                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
603                                         return -errno;
604                 }
605
606                 /* Second step: set the capabilites. This will reduce
607                  * the capabilities to the minimum we need. */
608
609                 if (!(d = cap_dup(context->capabilities)))
610                         return -errno;
611
612                 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
613                     cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
614                         r = -errno;
615                         cap_free(d);
616                         return r;
617                 }
618
619                 if (cap_set_proc(d) < 0) {
620                         r = -errno;
621                         cap_free(d);
622                         return r;
623                 }
624
625                 cap_free(d);
626         }
627
628         /* Third step: actually set the uids */
629         if (setresuid(uid, uid, uid) < 0)
630                 return -errno;
631
632         /* At this point we should have all necessary capabilities but
633            are otherwise a normal user. However, the caps might got
634            corrupted due to the setresuid() so we need clean them up
635            later. This is done outside of this call. */
636
637         return 0;
638 }
639
640 int exec_spawn(ExecCommand *command,
641                const ExecContext *context,
642                int *fds, unsigned n_fds,
643                bool apply_permissions,
644                bool apply_chroot,
645                bool confirm_spawn,
646                CGroupBonding *cgroup_bondings,
647                pid_t *ret) {
648
649         pid_t pid;
650         int r;
651         char *line;
652
653         assert(command);
654         assert(context);
655         assert(ret);
656         assert(fds || n_fds <= 0);
657
658         if (!(line = exec_command_line(command)))
659                 return -ENOMEM;
660
661         log_debug("About to execute: %s", line);
662         free(line);
663
664         if (cgroup_bondings)
665                 if ((r = cgroup_bonding_realize_list(cgroup_bondings)))
666                         return r;
667
668         if ((pid = fork()) < 0)
669                 return -errno;
670
671         if (pid == 0) {
672                 int i;
673                 sigset_t ss;
674                 const char *username = NULL, *home = NULL;
675                 uid_t uid = (uid_t) -1;
676                 gid_t gid = (gid_t) -1;
677                 char **our_env = NULL, **final_env = NULL;
678                 unsigned n_env = 0;
679                 int saved_stdout = -1, saved_stdin = -1;
680                 bool keep_stdout = false, keep_stdin = false;
681
682                 /* child */
683
684                 reset_all_signal_handlers();
685
686                 if (sigemptyset(&ss) < 0 ||
687                     sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
688                         r = EXIT_SIGNAL_MASK;
689                         goto fail;
690                 }
691
692                 if (setsid() < 0) {
693                         r = EXIT_SETSID;
694                         goto fail;
695                 }
696
697                 umask(context->umask);
698
699                 if (confirm_spawn) {
700                         char response;
701
702                         /* Set up terminal for the question */
703                         if ((r = setup_confirm_stdio(context,
704                                                      &saved_stdin, &saved_stdout)))
705                                 goto fail;
706
707                         /* Now ask the question. */
708                         if (!(line = exec_command_line(command))) {
709                                 r = EXIT_MEMORY;
710                                 goto fail;
711                         }
712
713                         r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
714                         free(line);
715
716                         if (r < 0 || response == 'n') {
717                                 r = EXIT_CONFIRM;
718                                 goto fail;
719                         } else if (response == 's') {
720                                 r = 0;
721                                 goto fail;
722                         }
723
724                         /* Release terminal for the question */
725                         if ((r = restore_conform_stdio(context,
726                                                        &saved_stdin, &saved_stdout,
727                                                        &keep_stdin, &keep_stdout)))
728                                 goto fail;
729                 }
730
731                 if (!keep_stdin)
732                         if (setup_input(context) < 0) {
733                                 r = EXIT_STDIN;
734                                 goto fail;
735                         }
736
737                 if (!keep_stdout)
738                         if (setup_output(context, file_name_from_path(command->path)) < 0) {
739                                 r = EXIT_STDOUT;
740                                 goto fail;
741                         }
742
743                 if (setup_error(context, file_name_from_path(command->path)) < 0) {
744                         r = EXIT_STDERR;
745                         goto fail;
746                 }
747
748                 if (cgroup_bondings)
749                         if ((r = cgroup_bonding_install_list(cgroup_bondings, 0)) < 0) {
750                                 r = EXIT_CGROUP;
751                                 goto fail;
752                         }
753
754                 if (context->oom_adjust_set) {
755                         char t[16];
756
757                         snprintf(t, sizeof(t), "%i", context->oom_adjust);
758                         char_array_0(t);
759
760                         if (write_one_line_file("/proc/self/oom_adj", t) < 0) {
761                                 r = EXIT_OOM_ADJUST;
762                                 goto fail;
763                         }
764                 }
765
766                 if (context->nice_set)
767                         if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
768                                 r = EXIT_NICE;
769                                 goto fail;
770                         }
771
772                 if (context->cpu_sched_set) {
773                         struct sched_param param;
774
775                         zero(param);
776                         param.sched_priority = context->cpu_sched_priority;
777
778                         if (sched_setscheduler(0, context->cpu_sched_policy |
779                                                (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), &param) < 0) {
780                                 r = EXIT_SETSCHEDULER;
781                                 goto fail;
782                         }
783                 }
784
785                 if (context->cpu_affinity_set)
786                         if (sched_setaffinity(0, sizeof(context->cpu_affinity), &context->cpu_affinity) < 0) {
787                                 r = EXIT_CPUAFFINITY;
788                                 goto fail;
789                         }
790
791                 if (context->ioprio_set)
792                         if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
793                                 r = EXIT_IOPRIO;
794                                 goto fail;
795                         }
796
797                 if (context->timer_slack_ns_set)
798                         if (prctl(PR_SET_TIMERSLACK, context->timer_slack_ns_set) < 0) {
799                                 r = EXIT_TIMERSLACK;
800                                 goto fail;
801                         }
802
803                 if (context->user) {
804                         username = context->user;
805                         if (get_user_creds(&username, &uid, &gid, &home) < 0) {
806                                 r = EXIT_USER;
807                                 goto fail;
808                         }
809                 }
810
811                 if (apply_permissions)
812                         if (enforce_groups(context, username, uid) < 0) {
813                                 r = EXIT_GROUP;
814                                 goto fail;
815                         }
816
817                 if (apply_chroot) {
818                         if (context->root_directory)
819                                 if (chroot(context->root_directory) < 0) {
820                                         r = EXIT_CHROOT;
821                                         goto fail;
822                                 }
823
824                         if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
825                                 r = EXIT_CHDIR;
826                                 goto fail;
827                         }
828                 } else {
829
830                         char *d;
831
832                         if (asprintf(&d, "%s/%s",
833                                      context->root_directory ? context->root_directory : "",
834                                      context->working_directory ? context->working_directory : "") < 0) {
835                                 r = EXIT_MEMORY;
836                                 goto fail;
837                         }
838
839                         if (chdir(d) < 0) {
840                                 free(d);
841                                 r = EXIT_CHDIR;
842                                 goto fail;
843                         }
844
845                         free(d);
846                 }
847
848                 if (close_all_fds(fds, n_fds) < 0 ||
849                     shift_fds(fds, n_fds) < 0 ||
850                     flags_fds(fds, n_fds, context->non_blocking) < 0) {
851                         r = EXIT_FDS;
852                         goto fail;
853                 }
854
855                 if (apply_permissions) {
856
857                         for (i = 0; i < RLIMIT_NLIMITS; i++) {
858                                 if (!context->rlimit[i])
859                                         continue;
860
861                                 if (setrlimit(i, context->rlimit[i]) < 0) {
862                                         r = EXIT_LIMITS;
863                                         goto fail;
864                                 }
865                         }
866
867                         if (context->user)
868                                 if (enforce_user(context, uid) < 0) {
869                                         r = EXIT_USER;
870                                         goto fail;
871                                 }
872
873                         /* PR_GET_SECUREBITS is not priviliged, while
874                          * PR_SET_SECUREBITS is. So to suppress
875                          * potential EPERMs we'll try not to call
876                          * PR_SET_SECUREBITS unless necessary. */
877                         if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
878                                 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
879                                         r = EXIT_SECUREBITS;
880                                         goto fail;
881                                 }
882
883                         if (context->capabilities)
884                                 if (cap_set_proc(context->capabilities) < 0) {
885                                         r = EXIT_CAPABILITIES;
886                                         goto fail;
887                                 }
888                 }
889
890                 if (!(our_env = new0(char*, 6))) {
891                         r = EXIT_MEMORY;
892                         goto fail;
893                 }
894
895                 if (n_fds > 0)
896                         if (asprintf(our_env + n_env++, "LISTEN_PID=%llu", (unsigned long long) getpid()) < 0 ||
897                             asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
898                                 r = EXIT_MEMORY;
899                                 goto fail;
900                         }
901
902                 if (home)
903                         if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
904                                 r = EXIT_MEMORY;
905                                 goto fail;
906                         }
907
908                 if (username)
909                         if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
910                             asprintf(our_env + n_env++, "USER=%s", username) < 0) {
911                                 r = EXIT_MEMORY;
912                                 goto fail;
913                         }
914
915                 if (!(final_env = strv_env_merge(environ, our_env, context->environment, NULL))) {
916                         r = EXIT_MEMORY;
917                         goto fail;
918                 }
919
920                 execve(command->path, command->argv, final_env);
921                 r = EXIT_EXEC;
922
923         fail:
924                 strv_free(our_env);
925                 strv_free(final_env);
926
927                 if (saved_stdin >= 0)
928                         close_nointr_nofail(saved_stdin);
929
930                 if (saved_stdout >= 0)
931                         close_nointr_nofail(saved_stdout);
932
933                 _exit(r);
934         }
935
936         /* We add the new process to the cgroup both in the child (so
937          * that we can be sure that no user code is ever executed
938          * outside of the cgroup) and in the parent (so that we can be
939          * sure that when we kill the cgroup the process will be
940          * killed too). */
941         if (cgroup_bondings)
942                 if ((r = cgroup_bonding_install_list(cgroup_bondings, pid)) < 0) {
943                         r = EXIT_CGROUP;
944                         goto fail;
945                 }
946
947         log_debug("Forked %s as %llu", command->path, (unsigned long long) pid);
948
949         command->exec_status.pid = pid;
950         command->exec_status.start_timestamp = now(CLOCK_REALTIME);
951
952         *ret = pid;
953         return 0;
954 }
955
956 void exec_context_init(ExecContext *c) {
957         assert(c);
958
959         c->umask = 0002;
960         c->oom_adjust = 0;
961         c->oom_adjust_set = false;
962         c->nice = 0;
963         c->nice_set = false;
964         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
965         c->ioprio_set = false;
966         c->cpu_sched_policy = SCHED_OTHER;
967         c->cpu_sched_priority = 0;
968         c->cpu_sched_set = false;
969         CPU_ZERO(&c->cpu_affinity);
970         c->cpu_affinity_set = false;
971         c->timer_slack_ns = 0;
972         c->timer_slack_ns_set = false;
973
974         c->cpu_sched_reset_on_fork = false;
975         c->non_blocking = false;
976
977         c->std_input = 0;
978         c->std_output = 0;
979         c->std_error = 0;
980         c->syslog_priority = LOG_DAEMON|LOG_INFO;
981
982         c->secure_bits = 0;
983         c->capability_bounding_set_drop = 0;
984 }
985
986 void exec_context_done(ExecContext *c) {
987         unsigned l;
988
989         assert(c);
990
991         strv_free(c->environment);
992         c->environment = NULL;
993
994         for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
995                 free(c->rlimit[l]);
996                 c->rlimit[l] = NULL;
997         }
998
999         free(c->working_directory);
1000         c->working_directory = NULL;
1001         free(c->root_directory);
1002         c->root_directory = NULL;
1003
1004         free(c->tty_path);
1005         c->tty_path = NULL;
1006
1007         free(c->syslog_identifier);
1008         c->syslog_identifier = NULL;
1009
1010         free(c->user);
1011         c->user = NULL;
1012
1013         free(c->group);
1014         c->group = NULL;
1015
1016         strv_free(c->supplementary_groups);
1017         c->supplementary_groups = NULL;
1018
1019         if (c->capabilities) {
1020                 cap_free(c->capabilities);
1021                 c->capabilities = NULL;
1022         }
1023 }
1024
1025 void exec_command_done(ExecCommand *c) {
1026         assert(c);
1027
1028         free(c->path);
1029         c->path = NULL;
1030
1031         strv_free(c->argv);
1032         c->argv = NULL;
1033 }
1034
1035 void exec_command_done_array(ExecCommand *c, unsigned n) {
1036         unsigned i;
1037
1038         for (i = 0; i < n; i++)
1039                 exec_command_done(c+i);
1040 }
1041
1042 void exec_command_free_list(ExecCommand *c) {
1043         ExecCommand *i;
1044
1045         while ((i = c)) {
1046                 LIST_REMOVE(ExecCommand, command, c, i);
1047                 exec_command_done(i);
1048                 free(i);
1049         }
1050 }
1051
1052 void exec_command_free_array(ExecCommand **c, unsigned n) {
1053         unsigned i;
1054
1055         for (i = 0; i < n; i++) {
1056                 exec_command_free_list(c[i]);
1057                 c[i] = NULL;
1058         }
1059 }
1060
1061 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1062         char ** e;
1063         unsigned i;
1064
1065         assert(c);
1066         assert(f);
1067
1068         if (!prefix)
1069                 prefix = "";
1070
1071         fprintf(f,
1072                 "%sUMask: %04o\n"
1073                 "%sWorkingDirectory: %s\n"
1074                 "%sRootDirectory: %s\n"
1075                 "%sNonBlocking: %s\n",
1076                 prefix, c->umask,
1077                 prefix, c->working_directory ? c->working_directory : "/",
1078                 prefix, c->root_directory ? c->root_directory : "/",
1079                 prefix, yes_no(c->non_blocking));
1080
1081         if (c->environment)
1082                 for (e = c->environment; *e; e++)
1083                         fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1084
1085         if (c->nice_set)
1086                 fprintf(f,
1087                         "%sNice: %i\n",
1088                         prefix, c->nice);
1089
1090         if (c->oom_adjust_set)
1091                 fprintf(f,
1092                         "%sOOMAdjust: %i\n",
1093                         prefix, c->oom_adjust);
1094
1095         for (i = 0; i < RLIM_NLIMITS; i++)
1096                 if (c->rlimit[i])
1097                         fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1098
1099         if (c->ioprio_set)
1100                 fprintf(f,
1101                         "%sIOSchedulingClass: %s\n"
1102                         "%sIOPriority: %i\n",
1103                         prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1104                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1105
1106         if (c->cpu_sched_set)
1107                 fprintf(f,
1108                         "%sCPUSchedulingPolicy: %s\n"
1109                         "%sCPUSchedulingPriority: %i\n"
1110                         "%sCPUSchedulingResetOnFork: %s\n",
1111                         prefix, sched_policy_to_string(c->cpu_sched_policy),
1112                         prefix, c->cpu_sched_priority,
1113                         prefix, yes_no(c->cpu_sched_reset_on_fork));
1114
1115         if (c->cpu_affinity_set) {
1116                 fprintf(f, "%sCPUAffinity:", prefix);
1117                 for (i = 0; i < CPU_SETSIZE; i++)
1118                         if (CPU_ISSET(i, &c->cpu_affinity))
1119                                 fprintf(f, " %i", i);
1120                 fputs("\n", f);
1121         }
1122
1123         if (c->timer_slack_ns_set)
1124                 fprintf(f, "%sTimerSlackNS: %lu\n", prefix, c->timer_slack_ns);
1125
1126         fprintf(f,
1127                 "%sStandardInput: %s\n"
1128                 "%sStandardOutput: %s\n"
1129                 "%sStandardError: %s\n",
1130                 prefix, exec_input_to_string(c->std_input),
1131                 prefix, exec_output_to_string(c->std_output),
1132                 prefix, exec_output_to_string(c->std_error));
1133
1134         if (c->tty_path)
1135                 fprintf(f,
1136                         "%sTTYPath: %s\n",
1137                         prefix, c->tty_path);
1138
1139         if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KERNEL ||
1140             c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KERNEL)
1141                 fprintf(f,
1142                         "%sSyslogFacility: %s\n"
1143                         "%sSyslogLevel: %s\n",
1144                         prefix, log_facility_to_string(LOG_FAC(c->syslog_priority)),
1145                         prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1146
1147         if (c->capabilities) {
1148                 char *t;
1149                 if ((t = cap_to_text(c->capabilities, NULL))) {
1150                         fprintf(f, "%sCapabilities: %s\n",
1151                                 prefix, t);
1152                         cap_free(t);
1153                 }
1154         }
1155
1156         if (c->secure_bits)
1157                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1158                         prefix,
1159                         (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1160                         (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1161                         (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1162                         (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1163                         (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1164                         (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1165
1166         if (c->capability_bounding_set_drop) {
1167                 fprintf(f, "%sCapabilityBoundingSetDrop:", prefix);
1168
1169                 for (i = 0; i <= CAP_LAST_CAP; i++)
1170                         if (c->capability_bounding_set_drop & (1 << i)) {
1171                                 char *t;
1172
1173                                 if ((t = cap_to_name(i))) {
1174                                         fprintf(f, " %s", t);
1175                                         free(t);
1176                                 }
1177                         }
1178
1179                 fputs("\n", f);
1180         }
1181
1182         if (c->user)
1183                 fprintf(f, "%sUser: %s", prefix, c->user);
1184         if (c->group)
1185                 fprintf(f, "%sGroup: %s", prefix, c->group);
1186
1187         if (c->supplementary_groups) {
1188                 char **g;
1189
1190                 fprintf(f, "%sSupplementaryGroups:", prefix);
1191
1192                 STRV_FOREACH(g, c->supplementary_groups)
1193                         fprintf(f, " %s", *g);
1194
1195                 fputs("\n", f);
1196         }
1197 }
1198
1199 void exec_status_fill(ExecStatus *s, pid_t pid, int code, int status) {
1200         assert(s);
1201
1202         s->pid = pid;
1203         s->exit_timestamp = now(CLOCK_REALTIME);
1204
1205         s->code = code;
1206         s->status = status;
1207 }
1208
1209 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1210         char buf[FORMAT_TIMESTAMP_MAX];
1211
1212         assert(s);
1213         assert(f);
1214
1215         if (!prefix)
1216                 prefix = "";
1217
1218         if (s->pid <= 0)
1219                 return;
1220
1221         fprintf(f,
1222                 "%sPID: %llu\n",
1223                 prefix, (unsigned long long) s->pid);
1224
1225         if (s->start_timestamp > 0)
1226                 fprintf(f,
1227                         "%sStart Timestamp: %s\n",
1228                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp));
1229
1230         if (s->exit_timestamp > 0)
1231                 fprintf(f,
1232                         "%sExit Timestamp: %s\n"
1233                         "%sExit Code: %s\n"
1234                         "%sExit Status: %i\n",
1235                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp),
1236                         prefix, sigchld_code_to_string(s->code),
1237                         prefix, s->status);
1238 }
1239
1240 char *exec_command_line(ExecCommand *c) {
1241         size_t k;
1242         char *n, *p, **a;
1243         bool first = true;
1244
1245         assert(c);
1246         assert(c->argv);
1247
1248         k = 1;
1249         STRV_FOREACH(a, c->argv)
1250                 k += strlen(*a)+3;
1251
1252         if (!(n = new(char, k)))
1253                 return NULL;
1254
1255         p = n;
1256         STRV_FOREACH(a, c->argv) {
1257
1258                 if (!first)
1259                         *(p++) = ' ';
1260                 else
1261                         first = false;
1262
1263                 if (strpbrk(*a, WHITESPACE)) {
1264                         *(p++) = '\'';
1265                         p = stpcpy(p, *a);
1266                         *(p++) = '\'';
1267                 } else
1268                         p = stpcpy(p, *a);
1269
1270         }
1271
1272         *p = 0;
1273
1274         /* FIXME: this doesn't really handle arguments that have
1275          * spaces and ticks in them */
1276
1277         return n;
1278 }
1279
1280 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1281         char *p2;
1282         const char *prefix2;
1283
1284         char *cmd;
1285
1286         assert(c);
1287         assert(f);
1288
1289         if (!prefix)
1290                 prefix = "";
1291         p2 = strappend(prefix, "\t");
1292         prefix2 = p2 ? p2 : prefix;
1293
1294         cmd = exec_command_line(c);
1295
1296         fprintf(f,
1297                 "%sCommand Line: %s\n",
1298                 prefix, cmd ? cmd : strerror(ENOMEM));
1299
1300         free(cmd);
1301
1302         exec_status_dump(&c->exec_status, f, prefix2);
1303
1304         free(p2);
1305 }
1306
1307 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
1308         assert(f);
1309
1310         if (!prefix)
1311                 prefix = "";
1312
1313         LIST_FOREACH(command, c, c)
1314                 exec_command_dump(c, f, prefix);
1315 }
1316
1317 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
1318         ExecCommand *end;
1319
1320         assert(l);
1321         assert(e);
1322
1323         if (*l) {
1324                 /* It's kinda important that we keep the order here */
1325                 LIST_FIND_TAIL(ExecCommand, command, *l, end);
1326                 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
1327         } else
1328               *l = e;
1329 }
1330
1331 int exec_command_set(ExecCommand *c, const char *path, ...) {
1332         va_list ap;
1333         char **l, *p;
1334
1335         assert(c);
1336         assert(path);
1337
1338         va_start(ap, path);
1339         l = strv_new_ap(path, ap);
1340         va_end(ap);
1341
1342         if (!l)
1343                 return -ENOMEM;
1344
1345         if (!(p = strdup(path))) {
1346                 strv_free(l);
1347                 return -ENOMEM;
1348         }
1349
1350         free(c->path);
1351         c->path = p;
1352
1353         strv_free(c->argv);
1354         c->argv = l;
1355
1356         return 0;
1357 }
1358
1359 const char* exit_status_to_string(ExitStatus status) {
1360         switch (status) {
1361
1362         case EXIT_SUCCESS:
1363                 return "SUCCESS";
1364
1365         case EXIT_FAILURE:
1366                 return "FAILURE";
1367
1368         case EXIT_INVALIDARGUMENT:
1369                 return "INVALIDARGUMENT";
1370
1371         case EXIT_NOTIMPLEMENTED:
1372                 return "NOTIMPLEMENTED";
1373
1374         case EXIT_NOPERMISSION:
1375                 return "NOPERMISSION";
1376
1377         case EXIT_NOTINSTALLED:
1378                 return "NOTINSSTALLED";
1379
1380         case EXIT_NOTCONFIGURED:
1381                 return "NOTCONFIGURED";
1382
1383         case EXIT_NOTRUNNING:
1384                 return "NOTRUNNING";
1385
1386         case EXIT_CHDIR:
1387                 return "CHDIR";
1388
1389         case EXIT_NICE:
1390                 return "NICE";
1391
1392         case EXIT_FDS:
1393                 return "FDS";
1394
1395         case EXIT_EXEC:
1396                 return "EXEC";
1397
1398         case EXIT_MEMORY:
1399                 return "MEMORY";
1400
1401         case EXIT_LIMITS:
1402                 return "LIMITS";
1403
1404         case EXIT_OOM_ADJUST:
1405                 return "OOM_ADJUST";
1406
1407         case EXIT_SIGNAL_MASK:
1408                 return "SIGNAL_MASK";
1409
1410         case EXIT_STDIN:
1411                 return "STDIN";
1412
1413         case EXIT_STDOUT:
1414                 return "STDOUT";
1415
1416         case EXIT_CHROOT:
1417                 return "CHROOT";
1418
1419         case EXIT_IOPRIO:
1420                 return "IOPRIO";
1421
1422         case EXIT_TIMERSLACK:
1423                 return "TIMERSLACK";
1424
1425         case EXIT_SECUREBITS:
1426                 return "SECUREBITS";
1427
1428         case EXIT_SETSCHEDULER:
1429                 return "SETSCHEDULER";
1430
1431         case EXIT_CPUAFFINITY:
1432                 return "CPUAFFINITY";
1433
1434         case EXIT_GROUP:
1435                 return "GROUP";
1436
1437         case EXIT_USER:
1438                 return "USER";
1439
1440         case EXIT_CAPABILITIES:
1441                 return "CAPABILITIES";
1442
1443         case EXIT_CGROUP:
1444                 return "CGROUP";
1445
1446         case EXIT_SETSID:
1447                 return "SETSID";
1448
1449         case EXIT_CONFIRM:
1450                 return "CONFIRM";
1451
1452         case EXIT_STDERR:
1453                 return "STDERR";
1454
1455         default:
1456                 return NULL;
1457         }
1458 }
1459
1460 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
1461         [EXEC_INPUT_NULL] = "null",
1462         [EXEC_INPUT_TTY] = "tty",
1463         [EXEC_INPUT_TTY_FORCE] = "tty-force",
1464         [EXEC_INPUT_TTY_FAIL] = "tty-fail"
1465 };
1466
1467 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
1468         [EXEC_OUTPUT_INHERIT] = "inherit",
1469         [EXEC_OUTPUT_NULL] = "null",
1470         [EXEC_OUTPUT_TTY] = "tty",
1471         [EXEC_OUTPUT_SYSLOG] = "syslog",
1472         [EXEC_OUTPUT_KERNEL] = "kernel"
1473 };
1474
1475 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
1476
1477 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);