chiark / gitweb /
30437425c4ac955cb8b75e74b950ee4f8f97cf0f
[elogind.git] / src / core / manager.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <string.h>
25 #include <sys/epoll.h>
26 #include <signal.h>
27 #include <sys/signalfd.h>
28 #include <sys/wait.h>
29 #include <unistd.h>
30 #include <sys/poll.h>
31 #include <sys/reboot.h>
32 #include <sys/ioctl.h>
33 #include <linux/kd.h>
34 #include <termios.h>
35 #include <fcntl.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39
40 #ifdef HAVE_AUDIT
41 #include <libaudit.h>
42 #endif
43
44 #include <systemd/sd-daemon.h>
45
46 #include "manager.h"
47 #include "transaction.h"
48 #include "hashmap.h"
49 #include "macro.h"
50 #include "strv.h"
51 #include "log.h"
52 #include "util.h"
53 #include "mkdir.h"
54 #include "ratelimit.h"
55 #include "cgroup.h"
56 #include "mount-setup.h"
57 #include "unit-name.h"
58 #include "dbus-unit.h"
59 #include "dbus-job.h"
60 #include "missing.h"
61 #include "path-lookup.h"
62 #include "special.h"
63 #include "bus-errors.h"
64 #include "exit-status.h"
65 #include "virt.h"
66 #include "watchdog.h"
67 #include "cgroup-util.h"
68 #include "path-util.h"
69
70 /* As soon as 16 units are in our GC queue, make sure to run a gc sweep */
71 #define GC_QUEUE_ENTRIES_MAX 16
72
73 /* As soon as 5s passed since a unit was added to our GC queue, make sure to run a gc sweep */
74 #define GC_QUEUE_USEC_MAX (10*USEC_PER_SEC)
75
76 /* Where clients shall send notification messages to */
77 #define NOTIFY_SOCKET_SYSTEM "/run/systemd/notify"
78 #define NOTIFY_SOCKET_USER "@/org/freedesktop/systemd1/notify"
79
80 static int manager_setup_notify(Manager *m) {
81         union {
82                 struct sockaddr sa;
83                 struct sockaddr_un un;
84         } sa;
85         struct epoll_event ev;
86         int one = 1, r;
87         mode_t u;
88
89         assert(m);
90
91         m->notify_watch.type = WATCH_NOTIFY;
92         if ((m->notify_watch.fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
93                 log_error("Failed to allocate notification socket: %m");
94                 return -errno;
95         }
96
97         zero(sa);
98         sa.sa.sa_family = AF_UNIX;
99
100         if (getpid() != 1)
101                 snprintf(sa.un.sun_path, sizeof(sa.un.sun_path), NOTIFY_SOCKET_USER "/%llu", random_ull());
102         else {
103                 unlink(NOTIFY_SOCKET_SYSTEM);
104                 strncpy(sa.un.sun_path, NOTIFY_SOCKET_SYSTEM, sizeof(sa.un.sun_path));
105         }
106
107         if (sa.un.sun_path[0] == '@')
108                 sa.un.sun_path[0] = 0;
109
110         u = umask(0111);
111         r = bind(m->notify_watch.fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1));
112         umask(u);
113
114         if (r < 0) {
115                 log_error("bind() failed: %m");
116                 return -errno;
117         }
118
119         if (setsockopt(m->notify_watch.fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one)) < 0) {
120                 log_error("SO_PASSCRED failed: %m");
121                 return -errno;
122         }
123
124         zero(ev);
125         ev.events = EPOLLIN;
126         ev.data.ptr = &m->notify_watch;
127
128         if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->notify_watch.fd, &ev) < 0)
129                 return -errno;
130
131         if (sa.un.sun_path[0] == 0)
132                 sa.un.sun_path[0] = '@';
133
134         if (!(m->notify_socket = strdup(sa.un.sun_path)))
135                 return -ENOMEM;
136
137         log_debug("Using notification socket %s", m->notify_socket);
138
139         return 0;
140 }
141
142 static int enable_special_signals(Manager *m) {
143         int fd;
144
145         assert(m);
146
147         /* Enable that we get SIGINT on control-alt-del. In containers
148          * this will fail with EPERM, so ignore that. */
149         if (reboot(RB_DISABLE_CAD) < 0 && errno != EPERM)
150                 log_warning("Failed to enable ctrl-alt-del handling: %m");
151
152         fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
153         if (fd < 0) {
154                 /* Support systems without virtual console */
155                 if (fd != -ENOENT)
156                         log_warning("Failed to open /dev/tty0: %m");
157         } else {
158                 /* Enable that we get SIGWINCH on kbrequest */
159                 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
160                         log_warning("Failed to enable kbrequest handling: %s", strerror(errno));
161
162                 close_nointr_nofail(fd);
163         }
164
165         return 0;
166 }
167
168 static int manager_setup_signals(Manager *m) {
169         sigset_t mask;
170         struct epoll_event ev;
171         struct sigaction sa;
172
173         assert(m);
174
175         /* We are not interested in SIGSTOP and friends. */
176         zero(sa);
177         sa.sa_handler = SIG_DFL;
178         sa.sa_flags = SA_NOCLDSTOP|SA_RESTART;
179         assert_se(sigaction(SIGCHLD, &sa, NULL) == 0);
180
181         assert_se(sigemptyset(&mask) == 0);
182
183         sigset_add_many(&mask,
184                         SIGCHLD,     /* Child died */
185                         SIGTERM,     /* Reexecute daemon */
186                         SIGHUP,      /* Reload configuration */
187                         SIGUSR1,     /* systemd/upstart: reconnect to D-Bus */
188                         SIGUSR2,     /* systemd: dump status */
189                         SIGINT,      /* Kernel sends us this on control-alt-del */
190                         SIGWINCH,    /* Kernel sends us this on kbrequest (alt-arrowup) */
191                         SIGPWR,      /* Some kernel drivers and upsd send us this on power failure */
192                         SIGRTMIN+0,  /* systemd: start default.target */
193                         SIGRTMIN+1,  /* systemd: isolate rescue.target */
194                         SIGRTMIN+2,  /* systemd: isolate emergency.target */
195                         SIGRTMIN+3,  /* systemd: start halt.target */
196                         SIGRTMIN+4,  /* systemd: start poweroff.target */
197                         SIGRTMIN+5,  /* systemd: start reboot.target */
198                         SIGRTMIN+6,  /* systemd: start kexec.target */
199                         SIGRTMIN+13, /* systemd: Immediate halt */
200                         SIGRTMIN+14, /* systemd: Immediate poweroff */
201                         SIGRTMIN+15, /* systemd: Immediate reboot */
202                         SIGRTMIN+16, /* systemd: Immediate kexec */
203                         SIGRTMIN+20, /* systemd: enable status messages */
204                         SIGRTMIN+21, /* systemd: disable status messages */
205                         SIGRTMIN+22, /* systemd: set log level to LOG_DEBUG */
206                         SIGRTMIN+23, /* systemd: set log level to LOG_INFO */
207                         SIGRTMIN+26, /* systemd: set log target to journal-or-kmsg */
208                         SIGRTMIN+27, /* systemd: set log target to console */
209                         SIGRTMIN+28, /* systemd: set log target to kmsg */
210                         SIGRTMIN+29, /* systemd: set log target to syslog-or-kmsg */
211                         -1);
212         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
213
214         m->signal_watch.type = WATCH_SIGNAL;
215         if ((m->signal_watch.fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0)
216                 return -errno;
217
218         zero(ev);
219         ev.events = EPOLLIN;
220         ev.data.ptr = &m->signal_watch;
221
222         if (epoll_ctl(m->epoll_fd, EPOLL_CTL_ADD, m->signal_watch.fd, &ev) < 0)
223                 return -errno;
224
225         if (m->running_as == MANAGER_SYSTEM)
226                 return enable_special_signals(m);
227
228         return 0;
229 }
230
231 static void manager_strip_environment(Manager *m) {
232         assert(m);
233
234         /* Remove variables from the inherited set that are part of
235          * the container interface:
236          * http://www.freedesktop.org/wiki/Software/systemd/ContainerInterface */
237         strv_remove_prefix(m->environment, "container=");
238         strv_remove_prefix(m->environment, "container_");
239
240         /* Remove variables from the inherited set that are part of
241          * the initrd interface:
242          * http://www.freedesktop.org/wiki/Software/systemd/InitrdInterface */
243         strv_remove_prefix(m->environment, "RD_");
244 }
245
246 int manager_new(ManagerRunningAs running_as, Manager **_m) {
247         Manager *m;
248         int r = -ENOMEM;
249
250         assert(_m);
251         assert(running_as >= 0);
252         assert(running_as < _MANAGER_RUNNING_AS_MAX);
253
254         if (!(m = new0(Manager, 1)))
255                 return -ENOMEM;
256
257         dual_timestamp_get(&m->startup_timestamp);
258
259         m->running_as = running_as;
260         m->name_data_slot = m->conn_data_slot = m->subscribed_data_slot = -1;
261         m->exit_code = _MANAGER_EXIT_CODE_INVALID;
262         m->pin_cgroupfs_fd = -1;
263         m->idle_pipe[0] = m->idle_pipe[1] = -1;
264
265 #ifdef HAVE_AUDIT
266         m->audit_fd = -1;
267 #endif
268
269         m->signal_watch.fd = m->mount_watch.fd = m->udev_watch.fd = m->epoll_fd = m->dev_autofs_fd = m->swap_watch.fd = -1;
270         m->current_job_id = 1; /* start as id #1, so that we can leave #0 around as "null-like" value */
271
272         m->environment = strv_copy(environ);
273         if (!m->environment)
274                 goto fail;
275
276         manager_strip_environment(m);
277
278         if (running_as == MANAGER_SYSTEM) {
279                 m->default_controllers = strv_new("cpu", NULL);
280                 if (!m->default_controllers)
281                         goto fail;
282         }
283
284         if (!(m->units = hashmap_new(string_hash_func, string_compare_func)))
285                 goto fail;
286
287         if (!(m->jobs = hashmap_new(trivial_hash_func, trivial_compare_func)))
288                 goto fail;
289
290         if (!(m->watch_pids = hashmap_new(trivial_hash_func, trivial_compare_func)))
291                 goto fail;
292
293         if (!(m->cgroup_bondings = hashmap_new(string_hash_func, string_compare_func)))
294                 goto fail;
295
296         if (!(m->watch_bus = hashmap_new(string_hash_func, string_compare_func)))
297                 goto fail;
298
299         if ((m->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0)
300                 goto fail;
301
302         if ((r = lookup_paths_init(&m->lookup_paths, m->running_as, true)) < 0)
303                 goto fail;
304
305         if ((r = manager_setup_signals(m)) < 0)
306                 goto fail;
307
308         if ((r = manager_setup_cgroup(m)) < 0)
309                 goto fail;
310
311         if ((r = manager_setup_notify(m)) < 0)
312                 goto fail;
313
314         /* Try to connect to the busses, if possible. */
315         if ((r = bus_init(m, running_as != MANAGER_SYSTEM)) < 0)
316                 goto fail;
317
318 #ifdef HAVE_AUDIT
319         if ((m->audit_fd = audit_open()) < 0 &&
320             /* If the kernel lacks netlink or audit support,
321              * don't worry about it. */
322             errno != EAFNOSUPPORT && errno != EPROTONOSUPPORT)
323                 log_error("Failed to connect to audit log: %m");
324 #endif
325
326         m->taint_usr = dir_is_empty("/usr") > 0;
327
328         *_m = m;
329         return 0;
330
331 fail:
332         manager_free(m);
333         return r;
334 }
335
336 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
337         Unit *u;
338         unsigned n = 0;
339
340         assert(m);
341
342         while ((u = m->cleanup_queue)) {
343                 assert(u->in_cleanup_queue);
344
345                 unit_free(u);
346                 n++;
347         }
348
349         return n;
350 }
351
352 enum {
353         GC_OFFSET_IN_PATH,  /* This one is on the path we were traveling */
354         GC_OFFSET_UNSURE,   /* No clue */
355         GC_OFFSET_GOOD,     /* We still need this unit */
356         GC_OFFSET_BAD,      /* We don't need this unit anymore */
357         _GC_OFFSET_MAX
358 };
359
360 static void unit_gc_sweep(Unit *u, unsigned gc_marker) {
361         Iterator i;
362         Unit *other;
363         bool is_bad;
364
365         assert(u);
366
367         if (u->gc_marker == gc_marker + GC_OFFSET_GOOD ||
368             u->gc_marker == gc_marker + GC_OFFSET_BAD ||
369             u->gc_marker == gc_marker + GC_OFFSET_IN_PATH)
370                 return;
371
372         if (u->in_cleanup_queue)
373                 goto bad;
374
375         if (unit_check_gc(u))
376                 goto good;
377
378         u->gc_marker = gc_marker + GC_OFFSET_IN_PATH;
379
380         is_bad = true;
381
382         SET_FOREACH(other, u->dependencies[UNIT_REFERENCED_BY], i) {
383                 unit_gc_sweep(other, gc_marker);
384
385                 if (other->gc_marker == gc_marker + GC_OFFSET_GOOD)
386                         goto good;
387
388                 if (other->gc_marker != gc_marker + GC_OFFSET_BAD)
389                         is_bad = false;
390         }
391
392         if (is_bad)
393                 goto bad;
394
395         /* We were unable to find anything out about this entry, so
396          * let's investigate it later */
397         u->gc_marker = gc_marker + GC_OFFSET_UNSURE;
398         unit_add_to_gc_queue(u);
399         return;
400
401 bad:
402         /* We definitely know that this one is not useful anymore, so
403          * let's mark it for deletion */
404         u->gc_marker = gc_marker + GC_OFFSET_BAD;
405         unit_add_to_cleanup_queue(u);
406         return;
407
408 good:
409         u->gc_marker = gc_marker + GC_OFFSET_GOOD;
410 }
411
412 static unsigned manager_dispatch_gc_queue(Manager *m) {
413         Unit *u;
414         unsigned n = 0;
415         unsigned gc_marker;
416
417         assert(m);
418
419         if ((m->n_in_gc_queue < GC_QUEUE_ENTRIES_MAX) &&
420             (m->gc_queue_timestamp <= 0 ||
421              (m->gc_queue_timestamp + GC_QUEUE_USEC_MAX) > now(CLOCK_MONOTONIC)))
422                 return 0;
423
424         log_debug("Running GC...");
425
426         m->gc_marker += _GC_OFFSET_MAX;
427         if (m->gc_marker + _GC_OFFSET_MAX <= _GC_OFFSET_MAX)
428                 m->gc_marker = 1;
429
430         gc_marker = m->gc_marker;
431
432         while ((u = m->gc_queue)) {
433                 assert(u->in_gc_queue);
434
435                 unit_gc_sweep(u, gc_marker);
436
437                 LIST_REMOVE(Unit, gc_queue, m->gc_queue, u);
438                 u->in_gc_queue = false;
439
440                 n++;
441
442                 if (u->gc_marker == gc_marker + GC_OFFSET_BAD ||
443                     u->gc_marker == gc_marker + GC_OFFSET_UNSURE) {
444                         log_debug("Collecting %s", u->id);
445                         u->gc_marker = gc_marker + GC_OFFSET_BAD;
446                         unit_add_to_cleanup_queue(u);
447                 }
448         }
449
450         m->n_in_gc_queue = 0;
451         m->gc_queue_timestamp = 0;
452
453         return n;
454 }
455
456 static void manager_clear_jobs_and_units(Manager *m) {
457         Unit *u;
458
459         assert(m);
460
461         while ((u = hashmap_first(m->units)))
462                 unit_free(u);
463
464         manager_dispatch_cleanup_queue(m);
465
466         assert(!m->load_queue);
467         assert(!m->run_queue);
468         assert(!m->dbus_unit_queue);
469         assert(!m->dbus_job_queue);
470         assert(!m->cleanup_queue);
471         assert(!m->gc_queue);
472
473         assert(hashmap_isempty(m->jobs));
474         assert(hashmap_isempty(m->units));
475 }
476
477 void manager_free(Manager *m) {
478         UnitType c;
479         int i;
480
481         assert(m);
482
483         manager_clear_jobs_and_units(m);
484
485         for (c = 0; c < _UNIT_TYPE_MAX; c++)
486                 if (unit_vtable[c]->shutdown)
487                         unit_vtable[c]->shutdown(m);
488
489         /* If we reexecute ourselves, we keep the root cgroup
490          * around */
491         manager_shutdown_cgroup(m, m->exit_code != MANAGER_REEXECUTE);
492
493         manager_undo_generators(m);
494
495         bus_done(m);
496
497         hashmap_free(m->units);
498         hashmap_free(m->jobs);
499         hashmap_free(m->watch_pids);
500         hashmap_free(m->watch_bus);
501
502         if (m->epoll_fd >= 0)
503                 close_nointr_nofail(m->epoll_fd);
504         if (m->signal_watch.fd >= 0)
505                 close_nointr_nofail(m->signal_watch.fd);
506         if (m->notify_watch.fd >= 0)
507                 close_nointr_nofail(m->notify_watch.fd);
508
509 #ifdef HAVE_AUDIT
510         if (m->audit_fd >= 0)
511                 audit_close(m->audit_fd);
512 #endif
513
514         free(m->notify_socket);
515
516         lookup_paths_free(&m->lookup_paths);
517         strv_free(m->environment);
518
519         strv_free(m->default_controllers);
520
521         hashmap_free(m->cgroup_bondings);
522         set_free_free(m->unit_path_cache);
523
524         close_pipe(m->idle_pipe);
525
526         free(m->switch_root);
527         free(m->switch_root_init);
528
529         for (i = 0; i < RLIMIT_NLIMITS; i++)
530                 free(m->rlimit[i]);
531
532         free(m);
533 }
534
535 int manager_enumerate(Manager *m) {
536         int r = 0, q;
537         UnitType c;
538
539         assert(m);
540
541         /* Let's ask every type to load all units from disk/kernel
542          * that it might know */
543         for (c = 0; c < _UNIT_TYPE_MAX; c++)
544                 if (unit_vtable[c]->enumerate)
545                         if ((q = unit_vtable[c]->enumerate(m)) < 0)
546                                 r = q;
547
548         manager_dispatch_load_queue(m);
549         return r;
550 }
551
552 int manager_coldplug(Manager *m) {
553         int r = 0, q;
554         Iterator i;
555         Unit *u;
556         char *k;
557
558         assert(m);
559
560         /* Then, let's set up their initial state. */
561         HASHMAP_FOREACH_KEY(u, k, m->units, i) {
562
563                 /* ignore aliases */
564                 if (u->id != k)
565                         continue;
566
567                 if ((q = unit_coldplug(u)) < 0)
568                         r = q;
569         }
570
571         return r;
572 }
573
574 static void manager_build_unit_path_cache(Manager *m) {
575         char **i;
576         DIR *d = NULL;
577         int r;
578
579         assert(m);
580
581         set_free_free(m->unit_path_cache);
582
583         if (!(m->unit_path_cache = set_new(string_hash_func, string_compare_func))) {
584                 log_error("Failed to allocate unit path cache.");
585                 return;
586         }
587
588         /* This simply builds a list of files we know exist, so that
589          * we don't always have to go to disk */
590
591         STRV_FOREACH(i, m->lookup_paths.unit_path) {
592                 struct dirent *de;
593
594                 if (!(d = opendir(*i))) {
595                         log_error("Failed to open directory: %m");
596                         continue;
597                 }
598
599                 while ((de = readdir(d))) {
600                         char *p;
601
602                         if (ignore_file(de->d_name))
603                                 continue;
604
605                         p = join(streq(*i, "/") ? "" : *i, "/", de->d_name, NULL);
606                         if (!p) {
607                                 r = -ENOMEM;
608                                 goto fail;
609                         }
610
611                         if ((r = set_put(m->unit_path_cache, p)) < 0) {
612                                 free(p);
613                                 goto fail;
614                         }
615                 }
616
617                 closedir(d);
618                 d = NULL;
619         }
620
621         return;
622
623 fail:
624         log_error("Failed to build unit path cache: %s", strerror(-r));
625
626         set_free_free(m->unit_path_cache);
627         m->unit_path_cache = NULL;
628
629         if (d)
630                 closedir(d);
631 }
632
633 int manager_startup(Manager *m, FILE *serialization, FDSet *fds) {
634         int r, q;
635
636         assert(m);
637
638         manager_run_generators(m);
639
640         manager_build_unit_path_cache(m);
641
642         /* If we will deserialize make sure that during enumeration
643          * this is already known, so we increase the counter here
644          * already */
645         if (serialization)
646                 m->n_reloading ++;
647
648         /* First, enumerate what we can from all config files */
649         r = manager_enumerate(m);
650
651         /* Second, deserialize if there is something to deserialize */
652         if (serialization)
653                 if ((q = manager_deserialize(m, serialization, fds)) < 0)
654                         r = q;
655
656         /* Third, fire things up! */
657         if ((q = manager_coldplug(m)) < 0)
658                 r = q;
659
660         if (serialization) {
661                 assert(m->n_reloading > 0);
662                 m->n_reloading --;
663         }
664
665         return r;
666 }
667
668 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, bool override, DBusError *e, Job **_ret) {
669         int r;
670         Transaction *tr;
671
672         assert(m);
673         assert(type < _JOB_TYPE_MAX);
674         assert(unit);
675         assert(mode < _JOB_MODE_MAX);
676
677         if (mode == JOB_ISOLATE && type != JOB_START) {
678                 dbus_set_error(e, BUS_ERROR_INVALID_JOB_MODE, "Isolate is only valid for start.");
679                 return -EINVAL;
680         }
681
682         if (mode == JOB_ISOLATE && !unit->allow_isolate) {
683                 dbus_set_error(e, BUS_ERROR_NO_ISOLATION, "Operation refused, unit may not be isolated.");
684                 return -EPERM;
685         }
686
687         log_debug("Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
688
689         job_type_collapse(&type, unit);
690
691         tr = transaction_new();
692         if (!tr)
693                 return -ENOMEM;
694
695         r = transaction_add_job_and_dependencies(tr, type, unit, NULL, true, override, false,
696                                                  mode == JOB_IGNORE_DEPENDENCIES || mode == JOB_IGNORE_REQUIREMENTS,
697                                                  mode == JOB_IGNORE_DEPENDENCIES, e);
698         if (r < 0)
699                 goto tr_abort;
700
701         if (mode == JOB_ISOLATE) {
702                 r = transaction_add_isolate_jobs(tr, m);
703                 if (r < 0)
704                         goto tr_abort;
705         }
706
707         r = transaction_activate(tr, m, mode, e);
708         if (r < 0)
709                 goto tr_abort;
710
711         log_debug("Enqueued job %s/%s as %u", unit->id, job_type_to_string(type), (unsigned) tr->anchor_job->id);
712
713         if (_ret)
714                 *_ret = tr->anchor_job;
715
716         transaction_free(tr);
717         return 0;
718
719 tr_abort:
720         transaction_abort(tr);
721         transaction_free(tr);
722         return r;
723 }
724
725 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, bool override, DBusError *e, Job **_ret) {
726         Unit *unit;
727         int r;
728
729         assert(m);
730         assert(type < _JOB_TYPE_MAX);
731         assert(name);
732         assert(mode < _JOB_MODE_MAX);
733
734         if ((r = manager_load_unit(m, name, NULL, NULL, &unit)) < 0)
735                 return r;
736
737         return manager_add_job(m, type, unit, mode, override, e, _ret);
738 }
739
740 Job *manager_get_job(Manager *m, uint32_t id) {
741         assert(m);
742
743         return hashmap_get(m->jobs, UINT32_TO_PTR(id));
744 }
745
746 Unit *manager_get_unit(Manager *m, const char *name) {
747         assert(m);
748         assert(name);
749
750         return hashmap_get(m->units, name);
751 }
752
753 unsigned manager_dispatch_load_queue(Manager *m) {
754         Unit *u;
755         unsigned n = 0;
756
757         assert(m);
758
759         /* Make sure we are not run recursively */
760         if (m->dispatching_load_queue)
761                 return 0;
762
763         m->dispatching_load_queue = true;
764
765         /* Dispatches the load queue. Takes a unit from the queue and
766          * tries to load its data until the queue is empty */
767
768         while ((u = m->load_queue)) {
769                 assert(u->in_load_queue);
770
771                 unit_load(u);
772                 n++;
773         }
774
775         m->dispatching_load_queue = false;
776         return n;
777 }
778
779 int manager_load_unit_prepare(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
780         Unit *ret;
781         UnitType t;
782         int r;
783
784         assert(m);
785         assert(name || path);
786
787         /* This will prepare the unit for loading, but not actually
788          * load anything from disk. */
789
790         if (path && !is_path(path)) {
791                 dbus_set_error(e, BUS_ERROR_INVALID_PATH, "Path %s is not absolute.", path);
792                 return -EINVAL;
793         }
794
795         if (!name)
796                 name = path_get_file_name(path);
797
798         t = unit_name_to_type(name);
799
800         if (t == _UNIT_TYPE_INVALID || !unit_name_is_valid_no_type(name, false)) {
801                 dbus_set_error(e, BUS_ERROR_INVALID_NAME, "Unit name %s is not valid.", name);
802                 return -EINVAL;
803         }
804
805         ret = manager_get_unit(m, name);
806         if (ret) {
807                 *_ret = ret;
808                 return 1;
809         }
810
811         ret = unit_new(m, unit_vtable[t]->object_size);
812         if (!ret)
813                 return -ENOMEM;
814
815         if (path) {
816                 ret->fragment_path = strdup(path);
817                 if (!ret->fragment_path) {
818                         unit_free(ret);
819                         return -ENOMEM;
820                 }
821         }
822
823         if ((r = unit_add_name(ret, name)) < 0) {
824                 unit_free(ret);
825                 return r;
826         }
827
828         unit_add_to_load_queue(ret);
829         unit_add_to_dbus_queue(ret);
830         unit_add_to_gc_queue(ret);
831
832         if (_ret)
833                 *_ret = ret;
834
835         return 0;
836 }
837
838 int manager_load_unit(Manager *m, const char *name, const char *path, DBusError *e, Unit **_ret) {
839         int r;
840
841         assert(m);
842
843         /* This will load the service information files, but not actually
844          * start any services or anything. */
845
846         if ((r = manager_load_unit_prepare(m, name, path, e, _ret)) != 0)
847                 return r;
848
849         manager_dispatch_load_queue(m);
850
851         if (_ret)
852                 *_ret = unit_follow_merge(*_ret);
853
854         return 0;
855 }
856
857 void manager_dump_jobs(Manager *s, FILE *f, const char *prefix) {
858         Iterator i;
859         Job *j;
860
861         assert(s);
862         assert(f);
863
864         HASHMAP_FOREACH(j, s->jobs, i)
865                 job_dump(j, f, prefix);
866 }
867
868 void manager_dump_units(Manager *s, FILE *f, const char *prefix) {
869         Iterator i;
870         Unit *u;
871         const char *t;
872
873         assert(s);
874         assert(f);
875
876         HASHMAP_FOREACH_KEY(u, t, s->units, i)
877                 if (u->id == t)
878                         unit_dump(u, f, prefix);
879 }
880
881 void manager_clear_jobs(Manager *m) {
882         Job *j;
883
884         assert(m);
885
886         while ((j = hashmap_first(m->jobs)))
887                 /* No need to recurse. We're cancelling all jobs. */
888                 job_finish_and_invalidate(j, JOB_CANCELED, false);
889 }
890
891 unsigned manager_dispatch_run_queue(Manager *m) {
892         Job *j;
893         unsigned n = 0;
894
895         if (m->dispatching_run_queue)
896                 return 0;
897
898         m->dispatching_run_queue = true;
899
900         while ((j = m->run_queue)) {
901                 assert(j->installed);
902                 assert(j->in_run_queue);
903
904                 job_run_and_invalidate(j);
905                 n++;
906         }
907
908         m->dispatching_run_queue = false;
909         return n;
910 }
911
912 unsigned manager_dispatch_dbus_queue(Manager *m) {
913         Job *j;
914         Unit *u;
915         unsigned n = 0;
916
917         assert(m);
918
919         if (m->dispatching_dbus_queue)
920                 return 0;
921
922         m->dispatching_dbus_queue = true;
923
924         while ((u = m->dbus_unit_queue)) {
925                 assert(u->in_dbus_queue);
926
927                 bus_unit_send_change_signal(u);
928                 n++;
929         }
930
931         while ((j = m->dbus_job_queue)) {
932                 assert(j->in_dbus_queue);
933
934                 bus_job_send_change_signal(j);
935                 n++;
936         }
937
938         m->dispatching_dbus_queue = false;
939         return n;
940 }
941
942 static int manager_process_notify_fd(Manager *m) {
943         ssize_t n;
944
945         assert(m);
946
947         for (;;) {
948                 char buf[4096];
949                 struct msghdr msghdr;
950                 struct iovec iovec;
951                 struct ucred *ucred;
952                 union {
953                         struct cmsghdr cmsghdr;
954                         uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
955                 } control;
956                 Unit *u;
957                 char **tags;
958
959                 zero(iovec);
960                 iovec.iov_base = buf;
961                 iovec.iov_len = sizeof(buf)-1;
962
963                 zero(control);
964                 zero(msghdr);
965                 msghdr.msg_iov = &iovec;
966                 msghdr.msg_iovlen = 1;
967                 msghdr.msg_control = &control;
968                 msghdr.msg_controllen = sizeof(control);
969
970                 if ((n = recvmsg(m->notify_watch.fd, &msghdr, MSG_DONTWAIT)) <= 0) {
971                         if (n >= 0)
972                                 return -EIO;
973
974                         if (errno == EAGAIN || errno == EINTR)
975                                 break;
976
977                         return -errno;
978                 }
979
980                 if (msghdr.msg_controllen < CMSG_LEN(sizeof(struct ucred)) ||
981                     control.cmsghdr.cmsg_level != SOL_SOCKET ||
982                     control.cmsghdr.cmsg_type != SCM_CREDENTIALS ||
983                     control.cmsghdr.cmsg_len != CMSG_LEN(sizeof(struct ucred))) {
984                         log_warning("Received notify message without credentials. Ignoring.");
985                         continue;
986                 }
987
988                 ucred = (struct ucred*) CMSG_DATA(&control.cmsghdr);
989
990                 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(ucred->pid))))
991                         if (!(u = cgroup_unit_by_pid(m, ucred->pid))) {
992                                 log_warning("Cannot find unit for notify message of PID %lu.", (unsigned long) ucred->pid);
993                                 continue;
994                         }
995
996                 assert((size_t) n < sizeof(buf));
997                 buf[n] = 0;
998                 if (!(tags = strv_split(buf, "\n\r")))
999                         return -ENOMEM;
1000
1001                 log_debug("Got notification message for unit %s", u->id);
1002
1003                 if (UNIT_VTABLE(u)->notify_message)
1004                         UNIT_VTABLE(u)->notify_message(u, ucred->pid, tags);
1005
1006                 strv_free(tags);
1007         }
1008
1009         return 0;
1010 }
1011
1012 static int manager_dispatch_sigchld(Manager *m) {
1013         assert(m);
1014
1015         for (;;) {
1016                 siginfo_t si;
1017                 Unit *u;
1018                 int r;
1019
1020                 zero(si);
1021
1022                 /* First we call waitd() for a PID and do not reap the
1023                  * zombie. That way we can still access /proc/$PID for
1024                  * it while it is a zombie. */
1025                 if (waitid(P_ALL, 0, &si, WEXITED|WNOHANG|WNOWAIT) < 0) {
1026
1027                         if (errno == ECHILD)
1028                                 break;
1029
1030                         if (errno == EINTR)
1031                                 continue;
1032
1033                         return -errno;
1034                 }
1035
1036                 if (si.si_pid <= 0)
1037                         break;
1038
1039                 if (si.si_code == CLD_EXITED || si.si_code == CLD_KILLED || si.si_code == CLD_DUMPED) {
1040                         char *name = NULL;
1041
1042                         get_process_comm(si.si_pid, &name);
1043                         log_debug("Got SIGCHLD for process %lu (%s)", (unsigned long) si.si_pid, strna(name));
1044                         free(name);
1045                 }
1046
1047                 /* Let's flush any message the dying child might still
1048                  * have queued for us. This ensures that the process
1049                  * still exists in /proc so that we can figure out
1050                  * which cgroup and hence unit it belongs to. */
1051                 if ((r = manager_process_notify_fd(m)) < 0)
1052                         return r;
1053
1054                 /* And now figure out the unit this belongs to */
1055                 if (!(u = hashmap_get(m->watch_pids, LONG_TO_PTR(si.si_pid))))
1056                         u = cgroup_unit_by_pid(m, si.si_pid);
1057
1058                 /* And now, we actually reap the zombie. */
1059                 if (waitid(P_PID, si.si_pid, &si, WEXITED) < 0) {
1060                         if (errno == EINTR)
1061                                 continue;
1062
1063                         return -errno;
1064                 }
1065
1066                 if (si.si_code != CLD_EXITED && si.si_code != CLD_KILLED && si.si_code != CLD_DUMPED)
1067                         continue;
1068
1069                 log_debug("Child %lu died (code=%s, status=%i/%s)",
1070                           (long unsigned) si.si_pid,
1071                           sigchld_code_to_string(si.si_code),
1072                           si.si_status,
1073                           strna(si.si_code == CLD_EXITED
1074                                 ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL)
1075                                 : signal_to_string(si.si_status)));
1076
1077                 if (!u)
1078                         continue;
1079
1080                 log_debug("Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
1081
1082                 hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
1083                 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
1084         }
1085
1086         return 0;
1087 }
1088
1089 static int manager_start_target(Manager *m, const char *name, JobMode mode) {
1090         int r;
1091         DBusError error;
1092
1093         dbus_error_init(&error);
1094
1095         log_debug("Activating special unit %s", name);
1096
1097         if ((r = manager_add_job_by_name(m, JOB_START, name, mode, true, &error, NULL)) < 0)
1098                 log_error("Failed to enqueue %s job: %s", name, bus_error(&error, r));
1099
1100         dbus_error_free(&error);
1101
1102         return r;
1103 }
1104
1105 static int manager_process_signal_fd(Manager *m) {
1106         ssize_t n;
1107         struct signalfd_siginfo sfsi;
1108         bool sigchld = false;
1109
1110         assert(m);
1111
1112         for (;;) {
1113                 if ((n = read(m->signal_watch.fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) {
1114
1115                         if (n >= 0)
1116                                 return -EIO;
1117
1118                         if (errno == EINTR || errno == EAGAIN)
1119                                 break;
1120
1121                         return -errno;
1122                 }
1123
1124                 if (sfsi.ssi_pid > 0) {
1125                         char *p = NULL;
1126
1127                         get_process_comm(sfsi.ssi_pid, &p);
1128
1129                         log_debug("Received SIG%s from PID %lu (%s).",
1130                                   signal_to_string(sfsi.ssi_signo),
1131                                   (unsigned long) sfsi.ssi_pid, strna(p));
1132                         free(p);
1133                 } else
1134                         log_debug("Received SIG%s.", signal_to_string(sfsi.ssi_signo));
1135
1136                 switch (sfsi.ssi_signo) {
1137
1138                 case SIGCHLD:
1139                         sigchld = true;
1140                         break;
1141
1142                 case SIGTERM:
1143                         if (m->running_as == MANAGER_SYSTEM) {
1144                                 /* This is for compatibility with the
1145                                  * original sysvinit */
1146                                 m->exit_code = MANAGER_REEXECUTE;
1147                                 break;
1148                         }
1149
1150                         /* Fall through */
1151
1152                 case SIGINT:
1153                         if (m->running_as == MANAGER_SYSTEM) {
1154                                 manager_start_target(m, SPECIAL_CTRL_ALT_DEL_TARGET, JOB_REPLACE);
1155                                 break;
1156                         }
1157
1158                         /* Run the exit target if there is one, if not, just exit. */
1159                         if (manager_start_target(m, SPECIAL_EXIT_TARGET, JOB_REPLACE) < 0) {
1160                                 m->exit_code = MANAGER_EXIT;
1161                                 return 0;
1162                         }
1163
1164                         break;
1165
1166                 case SIGWINCH:
1167                         if (m->running_as == MANAGER_SYSTEM)
1168                                 manager_start_target(m, SPECIAL_KBREQUEST_TARGET, JOB_REPLACE);
1169
1170                         /* This is a nop on non-init */
1171                         break;
1172
1173                 case SIGPWR:
1174                         if (m->running_as == MANAGER_SYSTEM)
1175                                 manager_start_target(m, SPECIAL_SIGPWR_TARGET, JOB_REPLACE);
1176
1177                         /* This is a nop on non-init */
1178                         break;
1179
1180                 case SIGUSR1: {
1181                         Unit *u;
1182
1183                         u = manager_get_unit(m, SPECIAL_DBUS_SERVICE);
1184
1185                         if (!u || UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) {
1186                                 log_info("Trying to reconnect to bus...");
1187                                 bus_init(m, true);
1188                         }
1189
1190                         if (!u || !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(u))) {
1191                                 log_info("Loading D-Bus service...");
1192                                 manager_start_target(m, SPECIAL_DBUS_SERVICE, JOB_REPLACE);
1193                         }
1194
1195                         break;
1196                 }
1197
1198                 case SIGUSR2: {
1199                         FILE *f;
1200                         char *dump = NULL;
1201                         size_t size;
1202
1203                         if (!(f = open_memstream(&dump, &size))) {
1204                                 log_warning("Failed to allocate memory stream.");
1205                                 break;
1206                         }
1207
1208                         manager_dump_units(m, f, "\t");
1209                         manager_dump_jobs(m, f, "\t");
1210
1211                         if (ferror(f)) {
1212                                 fclose(f);
1213                                 free(dump);
1214                                 log_warning("Failed to write status stream");
1215                                 break;
1216                         }
1217
1218                         fclose(f);
1219                         log_dump(LOG_INFO, dump);
1220                         free(dump);
1221
1222                         break;
1223                 }
1224
1225                 case SIGHUP:
1226                         m->exit_code = MANAGER_RELOAD;
1227                         break;
1228
1229                 default: {
1230
1231                         /* Starting SIGRTMIN+0 */
1232                         static const char * const target_table[] = {
1233                                 [0] = SPECIAL_DEFAULT_TARGET,
1234                                 [1] = SPECIAL_RESCUE_TARGET,
1235                                 [2] = SPECIAL_EMERGENCY_TARGET,
1236                                 [3] = SPECIAL_HALT_TARGET,
1237                                 [4] = SPECIAL_POWEROFF_TARGET,
1238                                 [5] = SPECIAL_REBOOT_TARGET,
1239                                 [6] = SPECIAL_KEXEC_TARGET
1240                         };
1241
1242                         /* Starting SIGRTMIN+13, so that target halt and system halt are 10 apart */
1243                         static const ManagerExitCode code_table[] = {
1244                                 [0] = MANAGER_HALT,
1245                                 [1] = MANAGER_POWEROFF,
1246                                 [2] = MANAGER_REBOOT,
1247                                 [3] = MANAGER_KEXEC
1248                         };
1249
1250                         if ((int) sfsi.ssi_signo >= SIGRTMIN+0 &&
1251                             (int) sfsi.ssi_signo < SIGRTMIN+(int) ELEMENTSOF(target_table)) {
1252                                 int idx = (int) sfsi.ssi_signo - SIGRTMIN;
1253                                 manager_start_target(m, target_table[idx],
1254                                                      (idx == 1 || idx == 2) ? JOB_ISOLATE : JOB_REPLACE);
1255                                 break;
1256                         }
1257
1258                         if ((int) sfsi.ssi_signo >= SIGRTMIN+13 &&
1259                             (int) sfsi.ssi_signo < SIGRTMIN+13+(int) ELEMENTSOF(code_table)) {
1260                                 m->exit_code = code_table[sfsi.ssi_signo - SIGRTMIN - 13];
1261                                 break;
1262                         }
1263
1264                         switch (sfsi.ssi_signo - SIGRTMIN) {
1265
1266                         case 20:
1267                                 log_debug("Enabling showing of status.");
1268                                 manager_set_show_status(m, true);
1269                                 break;
1270
1271                         case 21:
1272                                 log_debug("Disabling showing of status.");
1273                                 manager_set_show_status(m, false);
1274                                 break;
1275
1276                         case 22:
1277                                 log_set_max_level(LOG_DEBUG);
1278                                 log_notice("Setting log level to debug.");
1279                                 break;
1280
1281                         case 23:
1282                                 log_set_max_level(LOG_INFO);
1283                                 log_notice("Setting log level to info.");
1284                                 break;
1285
1286                         case 26:
1287                                 log_set_target(LOG_TARGET_JOURNAL_OR_KMSG);
1288                                 log_notice("Setting log target to journal-or-kmsg.");
1289                                 break;
1290
1291                         case 27:
1292                                 log_set_target(LOG_TARGET_CONSOLE);
1293                                 log_notice("Setting log target to console.");
1294                                 break;
1295
1296                         case 28:
1297                                 log_set_target(LOG_TARGET_KMSG);
1298                                 log_notice("Setting log target to kmsg.");
1299                                 break;
1300
1301                         case 29:
1302                                 log_set_target(LOG_TARGET_SYSLOG_OR_KMSG);
1303                                 log_notice("Setting log target to syslog-or-kmsg.");
1304                                 break;
1305
1306                         default:
1307                                 log_warning("Got unhandled signal <%s>.", signal_to_string(sfsi.ssi_signo));
1308                         }
1309                 }
1310                 }
1311         }
1312
1313         if (sigchld)
1314                 return manager_dispatch_sigchld(m);
1315
1316         return 0;
1317 }
1318
1319 static int process_event(Manager *m, struct epoll_event *ev) {
1320         int r;
1321         Watch *w;
1322
1323         assert(m);
1324         assert(ev);
1325
1326         assert_se(w = ev->data.ptr);
1327
1328         if (w->type == WATCH_INVALID)
1329                 return 0;
1330
1331         switch (w->type) {
1332
1333         case WATCH_SIGNAL:
1334
1335                 /* An incoming signal? */
1336                 if (ev->events != EPOLLIN)
1337                         return -EINVAL;
1338
1339                 if ((r = manager_process_signal_fd(m)) < 0)
1340                         return r;
1341
1342                 break;
1343
1344         case WATCH_NOTIFY:
1345
1346                 /* An incoming daemon notification event? */
1347                 if (ev->events != EPOLLIN)
1348                         return -EINVAL;
1349
1350                 if ((r = manager_process_notify_fd(m)) < 0)
1351                         return r;
1352
1353                 break;
1354
1355         case WATCH_FD:
1356
1357                 /* Some fd event, to be dispatched to the units */
1358                 UNIT_VTABLE(w->data.unit)->fd_event(w->data.unit, w->fd, ev->events, w);
1359                 break;
1360
1361         case WATCH_UNIT_TIMER:
1362         case WATCH_JOB_TIMER: {
1363                 uint64_t v;
1364                 ssize_t k;
1365
1366                 /* Some timer event, to be dispatched to the units */
1367                 if ((k = read(w->fd, &v, sizeof(v))) != sizeof(v)) {
1368
1369                         if (k < 0 && (errno == EINTR || errno == EAGAIN))
1370                                 break;
1371
1372                         return k < 0 ? -errno : -EIO;
1373                 }
1374
1375                 if (w->type == WATCH_UNIT_TIMER)
1376                         UNIT_VTABLE(w->data.unit)->timer_event(w->data.unit, v, w);
1377                 else
1378                         job_timer_event(w->data.job, v, w);
1379                 break;
1380         }
1381
1382         case WATCH_MOUNT:
1383                 /* Some mount table change, intended for the mount subsystem */
1384                 mount_fd_event(m, ev->events);
1385                 break;
1386
1387         case WATCH_SWAP:
1388                 /* Some swap table change, intended for the swap subsystem */
1389                 swap_fd_event(m, ev->events);
1390                 break;
1391
1392         case WATCH_UDEV:
1393                 /* Some notification from udev, intended for the device subsystem */
1394                 device_fd_event(m, ev->events);
1395                 break;
1396
1397         case WATCH_DBUS_WATCH:
1398                 bus_watch_event(m, w, ev->events);
1399                 break;
1400
1401         case WATCH_DBUS_TIMEOUT:
1402                 bus_timeout_event(m, w, ev->events);
1403                 break;
1404
1405         default:
1406                 log_error("event type=%i", w->type);
1407                 assert_not_reached("Unknown epoll event type.");
1408         }
1409
1410         return 0;
1411 }
1412
1413 int manager_loop(Manager *m) {
1414         int r;
1415
1416         RATELIMIT_DEFINE(rl, 1*USEC_PER_SEC, 50000);
1417
1418         assert(m);
1419         m->exit_code = MANAGER_RUNNING;
1420
1421         /* Release the path cache */
1422         set_free_free(m->unit_path_cache);
1423         m->unit_path_cache = NULL;
1424
1425         manager_check_finished(m);
1426
1427         /* There might still be some zombies hanging around from
1428          * before we were exec()'ed. Leat's reap them */
1429         r = manager_dispatch_sigchld(m);
1430         if (r < 0)
1431                 return r;
1432
1433         while (m->exit_code == MANAGER_RUNNING) {
1434                 struct epoll_event event;
1435                 int n;
1436                 int wait_msec = -1;
1437
1438                 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM)
1439                         watchdog_ping();
1440
1441                 if (!ratelimit_test(&rl)) {
1442                         /* Yay, something is going seriously wrong, pause a little */
1443                         log_warning("Looping too fast. Throttling execution a little.");
1444                         sleep(1);
1445                         continue;
1446                 }
1447
1448                 if (manager_dispatch_load_queue(m) > 0)
1449                         continue;
1450
1451                 if (manager_dispatch_run_queue(m) > 0)
1452                         continue;
1453
1454                 if (bus_dispatch(m) > 0)
1455                         continue;
1456
1457                 if (manager_dispatch_cleanup_queue(m) > 0)
1458                         continue;
1459
1460                 if (manager_dispatch_gc_queue(m) > 0)
1461                         continue;
1462
1463                 if (manager_dispatch_dbus_queue(m) > 0)
1464                         continue;
1465
1466                 if (swap_dispatch_reload(m) > 0)
1467                         continue;
1468
1469                 /* Sleep for half the watchdog time */
1470                 if (m->runtime_watchdog > 0 && m->running_as == MANAGER_SYSTEM) {
1471                         wait_msec = (int) (m->runtime_watchdog / 2 / USEC_PER_MSEC);
1472                         if (wait_msec <= 0)
1473                                 wait_msec = 1;
1474                 } else
1475                         wait_msec = -1;
1476
1477                 n = epoll_wait(m->epoll_fd, &event, 1, wait_msec);
1478                 if (n < 0) {
1479
1480                         if (errno == EINTR)
1481                                 continue;
1482
1483                         return -errno;
1484                 } else if (n == 0)
1485                         continue;
1486
1487                 assert(n == 1);
1488
1489                 r = process_event(m, &event);
1490                 if (r < 0)
1491                         return r;
1492         }
1493
1494         return m->exit_code;
1495 }
1496
1497 int manager_load_unit_from_dbus_path(Manager *m, const char *s, DBusError *e, Unit **_u) {
1498         char *n;
1499         Unit *u;
1500         int r;
1501
1502         assert(m);
1503         assert(s);
1504         assert(_u);
1505
1506         if (!startswith(s, "/org/freedesktop/systemd1/unit/"))
1507                 return -EINVAL;
1508
1509         n = bus_path_unescape(s+31);
1510         if (!n)
1511                 return -ENOMEM;
1512
1513         r = manager_load_unit(m, n, NULL, e, &u);
1514         free(n);
1515
1516         if (r < 0)
1517                 return r;
1518
1519         *_u = u;
1520
1521         return 0;
1522 }
1523
1524 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j) {
1525         Job *j;
1526         unsigned id;
1527         int r;
1528
1529         assert(m);
1530         assert(s);
1531         assert(_j);
1532
1533         if (!startswith(s, "/org/freedesktop/systemd1/job/"))
1534                 return -EINVAL;
1535
1536         if ((r = safe_atou(s + 30, &id)) < 0)
1537                 return r;
1538
1539         if (!(j = manager_get_job(m, id)))
1540                 return -ENOENT;
1541
1542         *_j = j;
1543
1544         return 0;
1545 }
1546
1547 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
1548
1549 #ifdef HAVE_AUDIT
1550         char *p;
1551
1552         if (m->audit_fd < 0)
1553                 return;
1554
1555         /* Don't generate audit events if the service was already
1556          * started and we're just deserializing */
1557         if (m->n_reloading > 0)
1558                 return;
1559
1560         if (m->running_as != MANAGER_SYSTEM)
1561                 return;
1562
1563         if (u->type != UNIT_SERVICE)
1564                 return;
1565
1566         if (!(p = unit_name_to_prefix_and_instance(u->id))) {
1567                 log_error("Failed to allocate unit name for audit message: %s", strerror(ENOMEM));
1568                 return;
1569         }
1570
1571         if (audit_log_user_comm_message(m->audit_fd, type, "", p, NULL, NULL, NULL, success) < 0) {
1572                 if (errno == EPERM) {
1573                         /* We aren't allowed to send audit messages?
1574                          * Then let's not retry again. */
1575                         audit_close(m->audit_fd);
1576                         m->audit_fd = -1;
1577                 } else
1578                         log_warning("Failed to send audit message: %m");
1579         }
1580
1581         free(p);
1582 #endif
1583
1584 }
1585
1586 void manager_send_unit_plymouth(Manager *m, Unit *u) {
1587         int fd = -1;
1588         union sockaddr_union sa;
1589         int n = 0;
1590         char *message = NULL;
1591
1592         /* Don't generate plymouth events if the service was already
1593          * started and we're just deserializing */
1594         if (m->n_reloading > 0)
1595                 return;
1596
1597         if (m->running_as != MANAGER_SYSTEM)
1598                 return;
1599
1600         if (u->type != UNIT_SERVICE &&
1601             u->type != UNIT_MOUNT &&
1602             u->type != UNIT_SWAP)
1603                 return;
1604
1605         /* We set SOCK_NONBLOCK here so that we rather drop the
1606          * message then wait for plymouth */
1607         if ((fd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0)) < 0) {
1608                 log_error("socket() failed: %m");
1609                 return;
1610         }
1611
1612         zero(sa);
1613         sa.sa.sa_family = AF_UNIX;
1614         strncpy(sa.un.sun_path+1, "/org/freedesktop/plymouthd", sizeof(sa.un.sun_path)-1);
1615         if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + 1 + strlen(sa.un.sun_path+1)) < 0) {
1616
1617                 if (errno != EPIPE &&
1618                     errno != EAGAIN &&
1619                     errno != ENOENT &&
1620                     errno != ECONNREFUSED &&
1621                     errno != ECONNRESET &&
1622                     errno != ECONNABORTED)
1623                         log_error("connect() failed: %m");
1624
1625                 goto finish;
1626         }
1627
1628         if (asprintf(&message, "U\002%c%s%n", (int) (strlen(u->id) + 1), u->id, &n) < 0) {
1629                 log_error("Out of memory");
1630                 goto finish;
1631         }
1632
1633         errno = 0;
1634         if (write(fd, message, n + 1) != n + 1) {
1635
1636                 if (errno != EPIPE &&
1637                     errno != EAGAIN &&
1638                     errno != ENOENT &&
1639                     errno != ECONNREFUSED &&
1640                     errno != ECONNRESET &&
1641                     errno != ECONNABORTED)
1642                         log_error("Failed to write Plymouth message: %m");
1643
1644                 goto finish;
1645         }
1646
1647 finish:
1648         if (fd >= 0)
1649                 close_nointr_nofail(fd);
1650
1651         free(message);
1652 }
1653
1654 void manager_dispatch_bus_name_owner_changed(
1655                 Manager *m,
1656                 const char *name,
1657                 const char* old_owner,
1658                 const char *new_owner) {
1659
1660         Unit *u;
1661
1662         assert(m);
1663         assert(name);
1664
1665         if (!(u = hashmap_get(m->watch_bus, name)))
1666                 return;
1667
1668         UNIT_VTABLE(u)->bus_name_owner_change(u, name, old_owner, new_owner);
1669 }
1670
1671 void manager_dispatch_bus_query_pid_done(
1672                 Manager *m,
1673                 const char *name,
1674                 pid_t pid) {
1675
1676         Unit *u;
1677
1678         assert(m);
1679         assert(name);
1680         assert(pid >= 1);
1681
1682         if (!(u = hashmap_get(m->watch_bus, name)))
1683                 return;
1684
1685         UNIT_VTABLE(u)->bus_query_pid_done(u, name, pid);
1686 }
1687
1688 int manager_open_serialization(Manager *m, FILE **_f) {
1689         char *path = NULL;
1690         mode_t saved_umask;
1691         int fd;
1692         FILE *f;
1693
1694         assert(_f);
1695
1696         if (m->running_as == MANAGER_SYSTEM)
1697                 asprintf(&path, "/run/systemd/dump-%lu-XXXXXX", (unsigned long) getpid());
1698         else
1699                 asprintf(&path, "/tmp/systemd-dump-%lu-XXXXXX", (unsigned long) getpid());
1700
1701         if (!path)
1702                 return -ENOMEM;
1703
1704         saved_umask = umask(0077);
1705         fd = mkostemp(path, O_RDWR|O_CLOEXEC);
1706         umask(saved_umask);
1707
1708         if (fd < 0) {
1709                 free(path);
1710                 return -errno;
1711         }
1712
1713         unlink(path);
1714
1715         log_debug("Serializing state to %s", path);
1716         free(path);
1717
1718         if (!(f = fdopen(fd, "w+")))
1719                 return -errno;
1720
1721         *_f = f;
1722
1723         return 0;
1724 }
1725
1726 int manager_serialize(Manager *m, FILE *f, FDSet *fds) {
1727         Iterator i;
1728         Unit *u;
1729         const char *t;
1730         int r;
1731
1732         assert(m);
1733         assert(f);
1734         assert(fds);
1735
1736         m->n_reloading ++;
1737
1738         fprintf(f, "current-job-id=%i\n", m->current_job_id);
1739         fprintf(f, "taint-usr=%s\n", yes_no(m->taint_usr));
1740
1741         dual_timestamp_serialize(f, "initrd-timestamp", &m->initrd_timestamp);
1742
1743         if (! in_initrd()) {
1744                 dual_timestamp_serialize(f, "startup-timestamp", &m->startup_timestamp);
1745                 dual_timestamp_serialize(f, "finish-timestamp", &m->finish_timestamp);
1746         }
1747
1748         fputc('\n', f);
1749
1750         HASHMAP_FOREACH_KEY(u, t, m->units, i) {
1751                 if (u->id != t)
1752                         continue;
1753
1754                 if (!unit_can_serialize(u))
1755                         continue;
1756
1757                 /* Start marker */
1758                 fputs(u->id, f);
1759                 fputc('\n', f);
1760
1761                 if ((r = unit_serialize(u, f, fds)) < 0) {
1762                         m->n_reloading --;
1763                         return r;
1764                 }
1765         }
1766
1767         assert(m->n_reloading > 0);
1768         m->n_reloading --;
1769
1770         if (ferror(f))
1771                 return -EIO;
1772
1773         r = bus_fdset_add_all(m, fds);
1774         if (r < 0)
1775                 return r;
1776
1777         return 0;
1778 }
1779
1780 int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
1781         int r = 0;
1782
1783         assert(m);
1784         assert(f);
1785
1786         log_debug("Deserializing state...");
1787
1788         m->n_reloading ++;
1789
1790         for (;;) {
1791                 char line[LINE_MAX], *l;
1792
1793                 if (!fgets(line, sizeof(line), f)) {
1794                         if (feof(f))
1795                                 r = 0;
1796                         else
1797                                 r = -errno;
1798
1799                         goto finish;
1800                 }
1801
1802                 char_array_0(line);
1803                 l = strstrip(line);
1804
1805                 if (l[0] == 0)
1806                         break;
1807
1808                 if (startswith(l, "current-job-id=")) {
1809                         uint32_t id;
1810
1811                         if (safe_atou32(l+15, &id) < 0)
1812                                 log_debug("Failed to parse current job id value %s", l+15);
1813                         else
1814                                 m->current_job_id = MAX(m->current_job_id, id);
1815                 } else if (startswith(l, "taint-usr=")) {
1816                         int b;
1817
1818                         if ((b = parse_boolean(l+10)) < 0)
1819                                 log_debug("Failed to parse taint /usr flag %s", l+10);
1820                         else
1821                                 m->taint_usr = m->taint_usr || b;
1822                 } else if (startswith(l, "initrd-timestamp="))
1823                         dual_timestamp_deserialize(l+17, &m->initrd_timestamp);
1824                 else if (startswith(l, "startup-timestamp="))
1825                         dual_timestamp_deserialize(l+18, &m->startup_timestamp);
1826                 else if (startswith(l, "finish-timestamp="))
1827                         dual_timestamp_deserialize(l+17, &m->finish_timestamp);
1828                 else
1829                         log_debug("Unknown serialization item '%s'", l);
1830         }
1831
1832         for (;;) {
1833                 Unit *u;
1834                 char name[UNIT_NAME_MAX+2];
1835
1836                 /* Start marker */
1837                 if (!fgets(name, sizeof(name), f)) {
1838                         if (feof(f))
1839                                 r = 0;
1840                         else
1841                                 r = -errno;
1842
1843                         goto finish;
1844                 }
1845
1846                 char_array_0(name);
1847
1848                 if ((r = manager_load_unit(m, strstrip(name), NULL, NULL, &u)) < 0)
1849                         goto finish;
1850
1851                 if ((r = unit_deserialize(u, f, fds)) < 0)
1852                         goto finish;
1853         }
1854
1855 finish:
1856         if (ferror(f)) {
1857                 r = -EIO;
1858                 goto finish;
1859         }
1860
1861         assert(m->n_reloading > 0);
1862         m->n_reloading --;
1863
1864         return r;
1865 }
1866
1867 int manager_reload(Manager *m) {
1868         int r, q;
1869         FILE *f;
1870         FDSet *fds;
1871
1872         assert(m);
1873
1874         if ((r = manager_open_serialization(m, &f)) < 0)
1875                 return r;
1876
1877         m->n_reloading ++;
1878
1879         if (!(fds = fdset_new())) {
1880                 m->n_reloading --;
1881                 r = -ENOMEM;
1882                 goto finish;
1883         }
1884
1885         if ((r = manager_serialize(m, f, fds)) < 0) {
1886                 m->n_reloading --;
1887                 goto finish;
1888         }
1889
1890         if (fseeko(f, 0, SEEK_SET) < 0) {
1891                 m->n_reloading --;
1892                 r = -errno;
1893                 goto finish;
1894         }
1895
1896         /* From here on there is no way back. */
1897         manager_clear_jobs_and_units(m);
1898         manager_undo_generators(m);
1899
1900         /* Find new unit paths */
1901         lookup_paths_free(&m->lookup_paths);
1902         if ((q = lookup_paths_init(&m->lookup_paths, m->running_as, true)) < 0)
1903                 r = q;
1904
1905         manager_run_generators(m);
1906
1907         manager_build_unit_path_cache(m);
1908
1909         /* First, enumerate what we can from all config files */
1910         if ((q = manager_enumerate(m)) < 0)
1911                 r = q;
1912
1913         /* Second, deserialize our stored data */
1914         if ((q = manager_deserialize(m, f, fds)) < 0)
1915                 r = q;
1916
1917         fclose(f);
1918         f = NULL;
1919
1920         /* Third, fire things up! */
1921         if ((q = manager_coldplug(m)) < 0)
1922                 r = q;
1923
1924         assert(m->n_reloading > 0);
1925         m->n_reloading--;
1926
1927 finish:
1928         if (f)
1929                 fclose(f);
1930
1931         if (fds)
1932                 fdset_free(fds);
1933
1934         return r;
1935 }
1936
1937 bool manager_is_booting_or_shutting_down(Manager *m) {
1938         Unit *u;
1939
1940         assert(m);
1941
1942         /* Is the initial job still around? */
1943         if (manager_get_job(m, m->default_unit_job_id))
1944                 return true;
1945
1946         /* Is there a job for the shutdown target? */
1947         u = manager_get_unit(m, SPECIAL_SHUTDOWN_TARGET);
1948         if (u)
1949                 return !!u->job;
1950
1951         return false;
1952 }
1953
1954 void manager_reset_failed(Manager *m) {
1955         Unit *u;
1956         Iterator i;
1957
1958         assert(m);
1959
1960         HASHMAP_FOREACH(u, m->units, i)
1961                 unit_reset_failed(u);
1962 }
1963
1964 bool manager_unit_pending_inactive(Manager *m, const char *name) {
1965         Unit *u;
1966
1967         assert(m);
1968         assert(name);
1969
1970         /* Returns true if the unit is inactive or going down */
1971         if (!(u = manager_get_unit(m, name)))
1972                 return true;
1973
1974         return unit_pending_inactive(u);
1975 }
1976
1977 void manager_check_finished(Manager *m) {
1978         char userspace[FORMAT_TIMESPAN_MAX], initrd[FORMAT_TIMESPAN_MAX], kernel[FORMAT_TIMESPAN_MAX], sum[FORMAT_TIMESPAN_MAX];
1979         usec_t kernel_usec, initrd_usec, userspace_usec, total_usec;
1980
1981         assert(m);
1982
1983         if (hashmap_size(m->jobs) > 0)
1984                 return;
1985
1986         /* Notify Type=idle units that we are done now */
1987         close_pipe(m->idle_pipe);
1988
1989         if (dual_timestamp_is_set(&m->finish_timestamp))
1990                 return;
1991
1992         dual_timestamp_get(&m->finish_timestamp);
1993
1994         if (m->running_as == MANAGER_SYSTEM && detect_container(NULL) <= 0) {
1995
1996                 userspace_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
1997                 total_usec = m->finish_timestamp.monotonic;
1998
1999                 if (dual_timestamp_is_set(&m->initrd_timestamp)) {
2000
2001                         kernel_usec = m->initrd_timestamp.monotonic;
2002                         initrd_usec = m->startup_timestamp.monotonic - m->initrd_timestamp.monotonic;
2003
2004                         log_info("Startup finished in %s (kernel) + %s (initrd) + %s (userspace) = %s.",
2005                                  format_timespan(kernel, sizeof(kernel), kernel_usec),
2006                                  format_timespan(initrd, sizeof(initrd), initrd_usec),
2007                                  format_timespan(userspace, sizeof(userspace), userspace_usec),
2008                                  format_timespan(sum, sizeof(sum), total_usec));
2009                 } else {
2010                         kernel_usec = m->startup_timestamp.monotonic;
2011                         initrd_usec = 0;
2012
2013                         log_info("Startup finished in %s (kernel) + %s (userspace) = %s.",
2014                                  format_timespan(kernel, sizeof(kernel), kernel_usec),
2015                                  format_timespan(userspace, sizeof(userspace), userspace_usec),
2016                                  format_timespan(sum, sizeof(sum), total_usec));
2017                 }
2018         } else {
2019                 userspace_usec = initrd_usec = kernel_usec = 0;
2020                 total_usec = m->finish_timestamp.monotonic - m->startup_timestamp.monotonic;
2021
2022                 log_debug("Startup finished in %s.",
2023                           format_timespan(sum, sizeof(sum), total_usec));
2024         }
2025
2026         bus_broadcast_finished(m, kernel_usec, initrd_usec, userspace_usec, total_usec);
2027
2028         sd_notifyf(false,
2029                    "READY=1\nSTATUS=Startup finished in %s.",
2030                    format_timespan(sum, sizeof(sum), total_usec));
2031 }
2032
2033 void manager_run_generators(Manager *m) {
2034         DIR *d = NULL;
2035         const char *generator_path;
2036         const char *argv[3];
2037         mode_t u;
2038
2039         assert(m);
2040
2041         generator_path = m->running_as == MANAGER_SYSTEM ? SYSTEM_GENERATOR_PATH : USER_GENERATOR_PATH;
2042         if (!(d = opendir(generator_path))) {
2043
2044                 if (errno == ENOENT)
2045                         return;
2046
2047                 log_error("Failed to enumerate generator directory: %m");
2048                 return;
2049         }
2050
2051         if (!m->generator_unit_path) {
2052                 const char *p;
2053                 char user_path[] = "/tmp/systemd-generator-XXXXXX";
2054
2055                 if (m->running_as == MANAGER_SYSTEM && getpid() == 1) {
2056                         p = "/run/systemd/generator";
2057
2058                         if (mkdir_p(p, 0755) < 0) {
2059                                 log_error("Failed to create generator directory: %m");
2060                                 goto finish;
2061                         }
2062
2063                 } else {
2064                         if (!(p = mkdtemp(user_path))) {
2065                                 log_error("Failed to create generator directory: %m");
2066                                 goto finish;
2067                         }
2068                 }
2069
2070                 if (!(m->generator_unit_path = strdup(p))) {
2071                         log_error("Failed to allocate generator unit path.");
2072                         goto finish;
2073                 }
2074         }
2075
2076         argv[0] = NULL; /* Leave this empty, execute_directory() will fill something in */
2077         argv[1] = m->generator_unit_path;
2078         argv[2] = NULL;
2079
2080         u = umask(0022);
2081         execute_directory(generator_path, d, (char**) argv);
2082         umask(u);
2083
2084         if (rmdir(m->generator_unit_path) >= 0) {
2085                 /* Uh? we were able to remove this dir? I guess that
2086                  * means the directory was empty, hence let's shortcut
2087                  * this */
2088
2089                 free(m->generator_unit_path);
2090                 m->generator_unit_path = NULL;
2091                 goto finish;
2092         }
2093
2094         if (!strv_find(m->lookup_paths.unit_path, m->generator_unit_path)) {
2095                 char **l;
2096
2097                 if (!(l = strv_append(m->lookup_paths.unit_path, m->generator_unit_path))) {
2098                         log_error("Failed to add generator directory to unit search path: %m");
2099                         goto finish;
2100                 }
2101
2102                 strv_free(m->lookup_paths.unit_path);
2103                 m->lookup_paths.unit_path = l;
2104
2105                 log_debug("Added generator unit path %s to search path.", m->generator_unit_path);
2106         }
2107
2108 finish:
2109         if (d)
2110                 closedir(d);
2111 }
2112
2113 void manager_undo_generators(Manager *m) {
2114         assert(m);
2115
2116         if (!m->generator_unit_path)
2117                 return;
2118
2119         strv_remove(m->lookup_paths.unit_path, m->generator_unit_path);
2120         rm_rf(m->generator_unit_path, false, true, false);
2121
2122         free(m->generator_unit_path);
2123         m->generator_unit_path = NULL;
2124 }
2125
2126 int manager_set_default_controllers(Manager *m, char **controllers) {
2127         char **l;
2128
2129         assert(m);
2130
2131         l = strv_copy(controllers);
2132         if (!l)
2133                 return -ENOMEM;
2134
2135         strv_free(m->default_controllers);
2136         m->default_controllers = l;
2137
2138         cg_shorten_controllers(m->default_controllers);
2139
2140         return 0;
2141 }
2142
2143 int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit) {
2144         int i;
2145
2146         assert(m);
2147
2148         for (i = 0; i < RLIMIT_NLIMITS; i++) {
2149                 if (default_rlimit[i]) {
2150                         m->rlimit[i] = newdup(struct rlimit, default_rlimit[i], 1);
2151
2152                         if (!m->rlimit[i])
2153                                 return -ENOMEM;
2154                 }
2155         }
2156
2157         return 0;
2158 }
2159
2160
2161 void manager_recheck_journal(Manager *m) {
2162         Unit *u;
2163
2164         assert(m);
2165
2166         if (m->running_as != MANAGER_SYSTEM)
2167                 return;
2168
2169         u = manager_get_unit(m, SPECIAL_JOURNALD_SOCKET);
2170         if (u && SOCKET(u)->state != SOCKET_RUNNING) {
2171                 log_close_journal();
2172                 return;
2173         }
2174
2175         u = manager_get_unit(m, SPECIAL_JOURNALD_SERVICE);
2176         if (u && SERVICE(u)->state != SERVICE_RUNNING) {
2177                 log_close_journal();
2178                 return;
2179         }
2180
2181         /* Hmm, OK, so the socket is fully up and the service is up
2182          * too, then let's make use of the thing. */
2183         log_open();
2184 }
2185
2186 void manager_set_show_status(Manager *m, bool b) {
2187         assert(m);
2188
2189         if (m->running_as != MANAGER_SYSTEM)
2190                 return;
2191
2192         m->show_status = b;
2193
2194         if (b)
2195                 touch("/run/systemd/show-status");
2196         else
2197                 unlink("/run/systemd/show-status");
2198 }
2199
2200 bool manager_get_show_status(Manager *m) {
2201         assert(m);
2202
2203         if (m->running_as != MANAGER_SYSTEM)
2204                 return false;
2205
2206         if (m->show_status)
2207                 return true;
2208
2209         /* If Plymouth is running make sure we show the status, so
2210          * that there's something nice to see when people press Esc */
2211
2212         return plymouth_running();
2213 }
2214
2215 static const char* const manager_running_as_table[_MANAGER_RUNNING_AS_MAX] = {
2216         [MANAGER_SYSTEM] = "system",
2217         [MANAGER_USER] = "user"
2218 };
2219
2220 DEFINE_STRING_TABLE_LOOKUP(manager_running_as, ManagerRunningAs);