chiark / gitweb /
move libsystemd_core.la sources into core/
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 2 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Manager *m, JobType type, Unit *unit) {
37         Job *j;
38
39         assert(m);
40         assert(type < _JOB_TYPE_MAX);
41         assert(unit);
42
43         if (!(j = new0(Job, 1)))
44                 return NULL;
45
46         j->manager = m;
47         j->id = m->current_job_id++;
48         j->type = type;
49         j->unit = unit;
50
51         j->timer_watch.type = WATCH_INVALID;
52
53         /* We don't link it here, that's what job_dependency() is for */
54
55         return j;
56 }
57
58 void job_free(Job *j) {
59         assert(j);
60
61         /* Detach from next 'bigger' objects */
62         if (j->installed) {
63                 bus_job_send_removed_signal(j);
64
65                 if (j->unit->job == j) {
66                         j->unit->job = NULL;
67                         unit_add_to_gc_queue(j->unit);
68                 }
69
70                 hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
71                 j->installed = false;
72         }
73
74         /* Detach from next 'smaller' objects */
75         manager_transaction_unlink_job(j->manager, j, true);
76
77         if (j->in_run_queue)
78                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
79
80         if (j->in_dbus_queue)
81                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
82
83         if (j->timer_watch.type != WATCH_INVALID) {
84                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
85                 assert(j->timer_watch.data.job == j);
86                 assert(j->timer_watch.fd >= 0);
87
88                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
89                 close_nointr_nofail(j->timer_watch.fd);
90         }
91
92         free(j->bus_client);
93         free(j);
94 }
95
96 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
97         JobDependency *l;
98
99         assert(object);
100
101         /* Adds a new job link, which encodes that the 'subject' job
102          * needs the 'object' job in some way. If 'subject' is NULL
103          * this means the 'anchor' job (i.e. the one the user
104          * explicitly asked for) is the requester. */
105
106         if (!(l = new0(JobDependency, 1)))
107                 return NULL;
108
109         l->subject = subject;
110         l->object = object;
111         l->matters = matters;
112         l->conflicts = conflicts;
113
114         if (subject)
115                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
116         else
117                 LIST_PREPEND(JobDependency, subject, object->manager->transaction_anchor, l);
118
119         LIST_PREPEND(JobDependency, object, object->object_list, l);
120
121         return l;
122 }
123
124 void job_dependency_free(JobDependency *l) {
125         assert(l);
126
127         if (l->subject)
128                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
129         else
130                 LIST_REMOVE(JobDependency, subject, l->object->manager->transaction_anchor, l);
131
132         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
133
134         free(l);
135 }
136
137 void job_dump(Job *j, FILE*f, const char *prefix) {
138         assert(j);
139         assert(f);
140
141         if (!prefix)
142                 prefix = "";
143
144         fprintf(f,
145                 "%s-> Job %u:\n"
146                 "%s\tAction: %s -> %s\n"
147                 "%s\tState: %s\n"
148                 "%s\tForced: %s\n",
149                 prefix, j->id,
150                 prefix, j->unit->id, job_type_to_string(j->type),
151                 prefix, job_state_to_string(j->state),
152                 prefix, yes_no(j->override));
153 }
154
155 bool job_is_anchor(Job *j) {
156         JobDependency *l;
157
158         assert(j);
159
160         LIST_FOREACH(object, l, j->object_list)
161                 if (!l->subject)
162                         return true;
163
164         return false;
165 }
166
167 /*
168  * Merging is commutative, so imagine the matrix as symmetric. We store only
169  * its lower triangle to avoid duplication. We don't store the main diagonal,
170  * because A merged with A is simply A.
171  *
172  * Merging is associative! A merged with B merged with C is the same as
173  * A merged with C merged with B.
174  *
175  * Mergeability is transitive! If A can be merged with B and B with C then
176  * A also with C.
177  *
178  * Also, if A merged with B cannot be merged with C, then either A or B cannot
179  * be merged with C either.
180  */
181 static const JobType job_merging_table[] = {
182 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
183 /************************************************************************************************************************************/
184 /*JOB_START          */
185 /*JOB_VERIFY_ACTIVE  */ JOB_START,
186 /*JOB_STOP           */ -1,                  -1,
187 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
188 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
189 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
190 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
191 };
192
193 JobType job_type_lookup_merge(JobType a, JobType b) {
194         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
195         assert(a >= 0 && a < _JOB_TYPE_MAX);
196         assert(b >= 0 && b < _JOB_TYPE_MAX);
197
198         if (a == b)
199                 return a;
200
201         if (a < b) {
202                 JobType tmp = a;
203                 a = b;
204                 b = tmp;
205         }
206
207         return job_merging_table[(a - 1) * a / 2 + b];
208 }
209
210 bool job_type_is_redundant(JobType a, UnitActiveState b) {
211         switch (a) {
212
213         case JOB_START:
214                 return
215                         b == UNIT_ACTIVE ||
216                         b == UNIT_RELOADING;
217
218         case JOB_STOP:
219                 return
220                         b == UNIT_INACTIVE ||
221                         b == UNIT_FAILED;
222
223         case JOB_VERIFY_ACTIVE:
224                 return
225                         b == UNIT_ACTIVE ||
226                         b == UNIT_RELOADING;
227
228         case JOB_RELOAD:
229                 return
230                         b == UNIT_RELOADING;
231
232         case JOB_RELOAD_OR_START:
233                 return
234                         b == UNIT_ACTIVATING ||
235                         b == UNIT_RELOADING;
236
237         case JOB_RESTART:
238                 return
239                         b == UNIT_ACTIVATING;
240
241         case JOB_TRY_RESTART:
242                 return
243                         b == UNIT_ACTIVATING;
244
245         default:
246                 assert_not_reached("Invalid job type");
247         }
248 }
249
250 bool job_is_runnable(Job *j) {
251         Iterator i;
252         Unit *other;
253
254         assert(j);
255         assert(j->installed);
256
257         /* Checks whether there is any job running for the units this
258          * job needs to be running after (in the case of a 'positive'
259          * job type) or before (in the case of a 'negative' job
260          * type. */
261
262         /* First check if there is an override */
263         if (j->ignore_order)
264                 return true;
265
266         if (j->type == JOB_START ||
267             j->type == JOB_VERIFY_ACTIVE ||
268             j->type == JOB_RELOAD ||
269             j->type == JOB_RELOAD_OR_START) {
270
271                 /* Immediate result is that the job is or might be
272                  * started. In this case lets wait for the
273                  * dependencies, regardless whether they are
274                  * starting or stopping something. */
275
276                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
277                         if (other->job)
278                                 return false;
279         }
280
281         /* Also, if something else is being stopped and we should
282          * change state after it, then lets wait. */
283
284         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
285                 if (other->job &&
286                     (other->job->type == JOB_STOP ||
287                      other->job->type == JOB_RESTART ||
288                      other->job->type == JOB_TRY_RESTART))
289                         return false;
290
291         /* This means that for a service a and a service b where b
292          * shall be started after a:
293          *
294          *  start a + start b → 1st step start a, 2nd step start b
295          *  start a + stop b  → 1st step stop b,  2nd step start a
296          *  stop a  + start b → 1st step stop a,  2nd step start b
297          *  stop a  + stop b  → 1st step stop b,  2nd step stop a
298          *
299          *  This has the side effect that restarts are properly
300          *  synchronized too. */
301
302         return true;
303 }
304
305 static void job_change_type(Job *j, JobType newtype) {
306         log_debug("Converting job %s/%s -> %s/%s",
307                   j->unit->id, job_type_to_string(j->type),
308                   j->unit->id, job_type_to_string(newtype));
309
310         j->type = newtype;
311 }
312
313 int job_run_and_invalidate(Job *j) {
314         int r;
315         uint32_t id;
316         Manager *m;
317
318         assert(j);
319         assert(j->installed);
320
321         if (j->in_run_queue) {
322                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
323                 j->in_run_queue = false;
324         }
325
326         if (j->state != JOB_WAITING)
327                 return 0;
328
329         if (!job_is_runnable(j))
330                 return -EAGAIN;
331
332         j->state = JOB_RUNNING;
333         job_add_to_dbus_queue(j);
334
335         /* While we execute this operation the job might go away (for
336          * example: because it is replaced by a new, conflicting
337          * job.) To make sure we don't access a freed job later on we
338          * store the id here, so that we can verify the job is still
339          * valid. */
340         id = j->id;
341         m = j->manager;
342
343         switch (j->type) {
344
345                 case JOB_RELOAD_OR_START:
346                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
347                                 job_change_type(j, JOB_RELOAD);
348                                 r = unit_reload(j->unit);
349                                 break;
350                         }
351                         job_change_type(j, JOB_START);
352                         /* fall through */
353
354                 case JOB_START:
355                         r = unit_start(j->unit);
356
357                         /* If this unit cannot be started, then simply wait */
358                         if (r == -EBADR)
359                                 r = 0;
360                         break;
361
362                 case JOB_VERIFY_ACTIVE: {
363                         UnitActiveState t = unit_active_state(j->unit);
364                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
365                                 r = -EALREADY;
366                         else if (t == UNIT_ACTIVATING)
367                                 r = -EAGAIN;
368                         else
369                                 r = -ENOEXEC;
370                         break;
371                 }
372
373                 case JOB_TRY_RESTART:
374                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
375                                 r = -ENOEXEC;
376                                 break;
377                         }
378                         job_change_type(j, JOB_RESTART);
379                         /* fall through */
380
381                 case JOB_STOP:
382                 case JOB_RESTART:
383                         r = unit_stop(j->unit);
384
385                         /* If this unit cannot stopped, then simply wait. */
386                         if (r == -EBADR)
387                                 r = 0;
388                         break;
389
390                 case JOB_RELOAD:
391                         r = unit_reload(j->unit);
392                         break;
393
394                 default:
395                         assert_not_reached("Unknown job type");
396         }
397
398         if ((j = manager_get_job(m, id))) {
399                 if (r == -EALREADY)
400                         r = job_finish_and_invalidate(j, JOB_DONE);
401                 else if (r == -ENOEXEC)
402                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
403                 else if (r == -EAGAIN)
404                         j->state = JOB_WAITING;
405                 else if (r < 0)
406                         r = job_finish_and_invalidate(j, JOB_FAILED);
407         }
408
409         return r;
410 }
411
412 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
413         assert(u);
414
415         if (t == JOB_START) {
416
417                 switch (result) {
418
419                 case JOB_DONE:
420                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
421                         break;
422
423                 case JOB_FAILED:
424                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
425                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
426                         break;
427
428                 case JOB_DEPENDENCY:
429                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
430                         break;
431
432                 case JOB_TIMEOUT:
433                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
434                         break;
435
436                 default:
437                         ;
438                 }
439
440         } else if (t == JOB_STOP) {
441
442                 switch (result) {
443
444                 case JOB_TIMEOUT:
445                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
446                         break;
447
448                 case JOB_DONE:
449                 case JOB_FAILED:
450                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
451                         break;
452
453                 default:
454                         ;
455                 }
456         }
457 }
458
459 int job_finish_and_invalidate(Job *j, JobResult result) {
460         Unit *u;
461         Unit *other;
462         JobType t;
463         Iterator i;
464         bool recursed = false;
465
466         assert(j);
467         assert(j->installed);
468
469         job_add_to_dbus_queue(j);
470
471         /* Patch restart jobs so that they become normal start jobs */
472         if (result == JOB_DONE && j->type == JOB_RESTART) {
473
474                 job_change_type(j, JOB_START);
475                 j->state = JOB_WAITING;
476
477                 job_add_to_run_queue(j);
478
479                 u = j->unit;
480                 goto finish;
481         }
482
483         j->result = result;
484
485         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
486
487         if (result == JOB_FAILED)
488                 j->manager->n_failed_jobs ++;
489
490         u = j->unit;
491         t = j->type;
492         job_free(j);
493
494         job_print_status_message(u, t, result);
495
496         /* Fail depending jobs on failure */
497         if (result != JOB_DONE) {
498
499                 if (t == JOB_START ||
500                     t == JOB_VERIFY_ACTIVE ||
501                     t == JOB_RELOAD_OR_START) {
502
503                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
504                                 if (other->job &&
505                                     (other->job->type == JOB_START ||
506                                      other->job->type == JOB_VERIFY_ACTIVE ||
507                                      other->job->type == JOB_RELOAD_OR_START)) {
508                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
509                                         recursed = true;
510                                 }
511
512                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
513                                 if (other->job &&
514                                     (other->job->type == JOB_START ||
515                                      other->job->type == JOB_VERIFY_ACTIVE ||
516                                      other->job->type == JOB_RELOAD_OR_START)) {
517                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
518                                         recursed = true;
519                                 }
520
521                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
522                                 if (other->job &&
523                                     !other->job->override &&
524                                     (other->job->type == JOB_START ||
525                                      other->job->type == JOB_VERIFY_ACTIVE ||
526                                      other->job->type == JOB_RELOAD_OR_START)) {
527                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
528                                         recursed = true;
529                                 }
530
531                 } else if (t == JOB_STOP) {
532
533                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
534                                 if (other->job &&
535                                     (other->job->type == JOB_START ||
536                                      other->job->type == JOB_VERIFY_ACTIVE ||
537                                      other->job->type == JOB_RELOAD_OR_START)) {
538                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
539                                         recursed = true;
540                                 }
541                 }
542         }
543
544         /* Trigger OnFailure dependencies that are not generated by
545          * the unit itself. We don't tread JOB_CANCELED as failure in
546          * this context. And JOB_FAILURE is already handled by the
547          * unit itself. */
548         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
549                 log_notice("Job %s/%s failed with result '%s'.",
550                            u->id,
551                            job_type_to_string(t),
552                            job_result_to_string(result));
553
554                 unit_trigger_on_failure(u);
555         }
556
557 finish:
558         /* Try to start the next jobs that can be started */
559         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
560                 if (other->job)
561                         job_add_to_run_queue(other->job);
562         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
563                 if (other->job)
564                         job_add_to_run_queue(other->job);
565
566         manager_check_finished(u->manager);
567
568         return recursed;
569 }
570
571 int job_start_timer(Job *j) {
572         struct itimerspec its;
573         struct epoll_event ev;
574         int fd, r;
575         assert(j);
576
577         if (j->unit->job_timeout <= 0 ||
578             j->timer_watch.type == WATCH_JOB_TIMER)
579                 return 0;
580
581         assert(j->timer_watch.type == WATCH_INVALID);
582
583         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
584                 r = -errno;
585                 goto fail;
586         }
587
588         zero(its);
589         timespec_store(&its.it_value, j->unit->job_timeout);
590
591         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
592                 r = -errno;
593                 goto fail;
594         }
595
596         zero(ev);
597         ev.data.ptr = &j->timer_watch;
598         ev.events = EPOLLIN;
599
600         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
601                 r = -errno;
602                 goto fail;
603         }
604
605         j->timer_watch.type = WATCH_JOB_TIMER;
606         j->timer_watch.fd = fd;
607         j->timer_watch.data.job = j;
608
609         return 0;
610
611 fail:
612         if (fd >= 0)
613                 close_nointr_nofail(fd);
614
615         return r;
616 }
617
618 void job_add_to_run_queue(Job *j) {
619         assert(j);
620         assert(j->installed);
621
622         if (j->in_run_queue)
623                 return;
624
625         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
626         j->in_run_queue = true;
627 }
628
629 void job_add_to_dbus_queue(Job *j) {
630         assert(j);
631         assert(j->installed);
632
633         if (j->in_dbus_queue)
634                 return;
635
636         /* We don't check if anybody is subscribed here, since this
637          * job might just have been created and not yet assigned to a
638          * connection/client. */
639
640         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
641         j->in_dbus_queue = true;
642 }
643
644 char *job_dbus_path(Job *j) {
645         char *p;
646
647         assert(j);
648
649         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
650                 return NULL;
651
652         return p;
653 }
654
655 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
656         assert(j);
657         assert(w == &j->timer_watch);
658
659         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
660         job_finish_and_invalidate(j, JOB_TIMEOUT);
661 }
662
663 static const char* const job_state_table[_JOB_STATE_MAX] = {
664         [JOB_WAITING] = "waiting",
665         [JOB_RUNNING] = "running"
666 };
667
668 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
669
670 static const char* const job_type_table[_JOB_TYPE_MAX] = {
671         [JOB_START] = "start",
672         [JOB_VERIFY_ACTIVE] = "verify-active",
673         [JOB_STOP] = "stop",
674         [JOB_RELOAD] = "reload",
675         [JOB_RELOAD_OR_START] = "reload-or-start",
676         [JOB_RESTART] = "restart",
677         [JOB_TRY_RESTART] = "try-restart",
678 };
679
680 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
681
682 static const char* const job_mode_table[_JOB_MODE_MAX] = {
683         [JOB_FAIL] = "fail",
684         [JOB_REPLACE] = "replace",
685         [JOB_ISOLATE] = "isolate",
686         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
687         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
688 };
689
690 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
691
692 static const char* const job_result_table[_JOB_RESULT_MAX] = {
693         [JOB_DONE] = "done",
694         [JOB_CANCELED] = "canceled",
695         [JOB_TIMEOUT] = "timeout",
696         [JOB_FAILED] = "failed",
697         [JOB_DEPENDENCY] = "dependency",
698         [JOB_SKIPPED] = "skipped"
699 };
700
701 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);