chiark / gitweb /
781f83e175cf05900b405f59502b699bdb42cd51
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Manager *m, JobType type, Unit *unit) {
37         Job *j;
38
39         assert(m);
40         assert(type < _JOB_TYPE_MAX);
41         assert(unit);
42
43         if (!(j = new0(Job, 1)))
44                 return NULL;
45
46         j->manager = m;
47         j->id = m->current_job_id++;
48         j->type = type;
49         j->unit = unit;
50
51         j->timer_watch.type = WATCH_INVALID;
52
53         /* We don't link it here, that's what job_dependency() is for */
54
55         return j;
56 }
57
58 void job_free(Job *j) {
59         assert(j);
60
61         /* Detach from next 'bigger' objects */
62         if (j->installed) {
63                 bus_job_send_removed_signal(j);
64
65                 if (j->unit->job == j) {
66                         j->unit->job = NULL;
67                         unit_add_to_gc_queue(j->unit);
68                 }
69
70                 hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
71                 j->installed = false;
72         }
73
74         /* Detach from next 'smaller' objects */
75         manager_transaction_unlink_job(j->manager, j, true);
76
77         if (j->in_run_queue)
78                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
79
80         if (j->in_dbus_queue)
81                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
82
83         if (j->timer_watch.type != WATCH_INVALID) {
84                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
85                 assert(j->timer_watch.data.job == j);
86                 assert(j->timer_watch.fd >= 0);
87
88                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
89                 close_nointr_nofail(j->timer_watch.fd);
90         }
91
92         free(j->bus_client);
93         free(j);
94 }
95
96 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
97         JobDependency *l;
98
99         assert(object);
100
101         /* Adds a new job link, which encodes that the 'subject' job
102          * needs the 'object' job in some way. If 'subject' is NULL
103          * this means the 'anchor' job (i.e. the one the user
104          * explicitly asked for) is the requester. */
105
106         if (!(l = new0(JobDependency, 1)))
107                 return NULL;
108
109         l->subject = subject;
110         l->object = object;
111         l->matters = matters;
112         l->conflicts = conflicts;
113
114         if (subject)
115                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
116         else
117                 LIST_PREPEND(JobDependency, subject, object->manager->transaction_anchor, l);
118
119         LIST_PREPEND(JobDependency, object, object->object_list, l);
120
121         return l;
122 }
123
124 void job_dependency_free(JobDependency *l) {
125         assert(l);
126
127         if (l->subject)
128                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
129         else
130                 LIST_REMOVE(JobDependency, subject, l->object->manager->transaction_anchor, l);
131
132         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
133
134         free(l);
135 }
136
137 void job_dump(Job *j, FILE*f, const char *prefix) {
138         assert(j);
139         assert(f);
140
141         if (!prefix)
142                 prefix = "";
143
144         fprintf(f,
145                 "%s-> Job %u:\n"
146                 "%s\tAction: %s -> %s\n"
147                 "%s\tState: %s\n"
148                 "%s\tForced: %s\n",
149                 prefix, j->id,
150                 prefix, j->unit->id, job_type_to_string(j->type),
151                 prefix, job_state_to_string(j->state),
152                 prefix, yes_no(j->override));
153 }
154
155 bool job_is_anchor(Job *j) {
156         JobDependency *l;
157
158         assert(j);
159
160         LIST_FOREACH(object, l, j->object_list)
161                 if (!l->subject)
162                         return true;
163
164         return false;
165 }
166
167 /*
168  * Merging is commutative, so imagine the matrix as symmetric. We store only
169  * its lower triangle to avoid duplication. We don't store the main diagonal,
170  * because A merged with A is simply A.
171  *
172  * Merging is associative! A merged with B merged with C is the same as
173  * A merged with C merged with B.
174  *
175  * Mergeability is transitive! If A can be merged with B and B with C then
176  * A also with C.
177  *
178  * Also, if A merged with B cannot be merged with C, then either A or B cannot
179  * be merged with C either.
180  */
181 static const JobType job_merging_table[] = {
182 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
183 /************************************************************************************************************************************/
184 /*JOB_START          */
185 /*JOB_VERIFY_ACTIVE  */ JOB_START,
186 /*JOB_STOP           */ -1,                  -1,
187 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
188 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
189 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
190 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
191 };
192
193 JobType job_type_lookup_merge(JobType a, JobType b) {
194         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
195         assert(a >= 0 && a < _JOB_TYPE_MAX);
196         assert(b >= 0 && b < _JOB_TYPE_MAX);
197
198         if (a == b)
199                 return a;
200
201         if (a < b) {
202                 JobType tmp = a;
203                 a = b;
204                 b = tmp;
205         }
206
207         return job_merging_table[(a - 1) * a / 2 + b];
208 }
209
210 bool job_type_is_redundant(JobType a, UnitActiveState b) {
211         switch (a) {
212
213         case JOB_START:
214                 return
215                         b == UNIT_ACTIVE ||
216                         b == UNIT_RELOADING;
217
218         case JOB_STOP:
219                 return
220                         b == UNIT_INACTIVE ||
221                         b == UNIT_FAILED;
222
223         case JOB_VERIFY_ACTIVE:
224                 return
225                         b == UNIT_ACTIVE ||
226                         b == UNIT_RELOADING;
227
228         case JOB_RELOAD:
229                 return
230                         b == UNIT_RELOADING;
231
232         case JOB_RELOAD_OR_START:
233                 return
234                         b == UNIT_ACTIVATING ||
235                         b == UNIT_RELOADING;
236
237         case JOB_RESTART:
238                 return
239                         b == UNIT_ACTIVATING;
240
241         case JOB_TRY_RESTART:
242                 return
243                         b == UNIT_ACTIVATING;
244
245         default:
246                 assert_not_reached("Invalid job type");
247         }
248 }
249
250 bool job_is_runnable(Job *j) {
251         Iterator i;
252         Unit *other;
253
254         assert(j);
255         assert(j->installed);
256
257         /* Checks whether there is any job running for the units this
258          * job needs to be running after (in the case of a 'positive'
259          * job type) or before (in the case of a 'negative' job
260          * type. */
261
262         /* First check if there is an override */
263         if (j->ignore_order)
264                 return true;
265
266         if (j->type == JOB_START ||
267             j->type == JOB_VERIFY_ACTIVE ||
268             j->type == JOB_RELOAD ||
269             j->type == JOB_RELOAD_OR_START) {
270
271                 /* Immediate result is that the job is or might be
272                  * started. In this case lets wait for the
273                  * dependencies, regardless whether they are
274                  * starting or stopping something. */
275
276                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
277                         if (other->job)
278                                 return false;
279         }
280
281         /* Also, if something else is being stopped and we should
282          * change state after it, then lets wait. */
283
284         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
285                 if (other->job &&
286                     (other->job->type == JOB_STOP ||
287                      other->job->type == JOB_RESTART ||
288                      other->job->type == JOB_TRY_RESTART))
289                         return false;
290
291         /* This means that for a service a and a service b where b
292          * shall be started after a:
293          *
294          *  start a + start b → 1st step start a, 2nd step start b
295          *  start a + stop b  → 1st step stop b,  2nd step start a
296          *  stop a  + start b → 1st step stop a,  2nd step start b
297          *  stop a  + stop b  → 1st step stop b,  2nd step stop a
298          *
299          *  This has the side effect that restarts are properly
300          *  synchronized too. */
301
302         return true;
303 }
304
305 static void job_change_type(Job *j, JobType newtype) {
306         log_debug("Converting job %s/%s -> %s/%s",
307                   j->unit->id, job_type_to_string(j->type),
308                   j->unit->id, job_type_to_string(newtype));
309
310         j->type = newtype;
311 }
312
313 int job_run_and_invalidate(Job *j) {
314         int r;
315         uint32_t id;
316         Manager *m;
317
318         assert(j);
319         assert(j->installed);
320
321         if (j->in_run_queue) {
322                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
323                 j->in_run_queue = false;
324         }
325
326         if (j->state != JOB_WAITING)
327                 return 0;
328
329         if (!job_is_runnable(j))
330                 return -EAGAIN;
331
332         j->state = JOB_RUNNING;
333         job_add_to_dbus_queue(j);
334
335         /* While we execute this operation the job might go away (for
336          * example: because it is replaced by a new, conflicting
337          * job.) To make sure we don't access a freed job later on we
338          * store the id here, so that we can verify the job is still
339          * valid. */
340         id = j->id;
341         m = j->manager;
342
343         switch (j->type) {
344
345                 case JOB_RELOAD_OR_START:
346                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
347                                 job_change_type(j, JOB_RELOAD);
348                                 r = unit_reload(j->unit);
349                                 break;
350                         }
351                         job_change_type(j, JOB_START);
352                         /* fall through */
353
354                 case JOB_START:
355                         r = unit_start(j->unit);
356
357                         /* If this unit cannot be started, then simply wait */
358                         if (r == -EBADR)
359                                 r = 0;
360                         break;
361
362                 case JOB_VERIFY_ACTIVE: {
363                         UnitActiveState t = unit_active_state(j->unit);
364                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
365                                 r = -EALREADY;
366                         else if (t == UNIT_ACTIVATING)
367                                 r = -EAGAIN;
368                         else
369                                 r = -ENOEXEC;
370                         break;
371                 }
372
373                 case JOB_TRY_RESTART:
374                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
375                                 r = -ENOEXEC;
376                                 break;
377                         }
378                         job_change_type(j, JOB_RESTART);
379                         /* fall through */
380
381                 case JOB_STOP:
382                 case JOB_RESTART:
383                         r = unit_stop(j->unit);
384
385                         /* If this unit cannot stopped, then simply wait. */
386                         if (r == -EBADR)
387                                 r = 0;
388                         break;
389
390                 case JOB_RELOAD:
391                         r = unit_reload(j->unit);
392                         break;
393
394                 default:
395                         assert_not_reached("Unknown job type");
396         }
397
398         if ((j = manager_get_job(m, id))) {
399                 if (r == -EALREADY)
400                         r = job_finish_and_invalidate(j, JOB_DONE);
401                 else if (r == -ENOEXEC)
402                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
403                 else if (r == -EAGAIN)
404                         j->state = JOB_WAITING;
405                 else if (r < 0)
406                         r = job_finish_and_invalidate(j, JOB_FAILED);
407         }
408
409         return r;
410 }
411
412 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
413         assert(u);
414
415         if (t == JOB_START) {
416
417                 switch (result) {
418
419                 case JOB_DONE:
420                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
421                         break;
422
423                 case JOB_FAILED:
424                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
425                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
426                         break;
427
428                 case JOB_DEPENDENCY:
429                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
430                         break;
431
432                 case JOB_TIMEOUT:
433                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
434                         break;
435
436                 default:
437                         ;
438                 }
439
440         } else if (t == JOB_STOP) {
441
442                 switch (result) {
443
444                 case JOB_TIMEOUT:
445                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
446                         break;
447
448                 case JOB_DONE:
449                 case JOB_FAILED:
450                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
451                         break;
452
453                 default:
454                         ;
455                 }
456         }
457 }
458
459 int job_finish_and_invalidate(Job *j, JobResult result) {
460         Unit *u;
461         Unit *other;
462         JobType t;
463         Iterator i;
464         bool recursed = false;
465
466         assert(j);
467         assert(j->installed);
468
469         job_add_to_dbus_queue(j);
470
471         /* Patch restart jobs so that they become normal start jobs */
472         if (result == JOB_DONE && j->type == JOB_RESTART) {
473
474                 job_change_type(j, JOB_START);
475                 j->state = JOB_WAITING;
476
477                 job_add_to_run_queue(j);
478
479                 u = j->unit;
480                 goto finish;
481         }
482
483         j->result = result;
484
485         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
486
487         if (result == JOB_FAILED)
488                 j->manager->n_failed_jobs ++;
489
490         u = j->unit;
491         t = j->type;
492         job_free(j);
493
494         job_print_status_message(u, t, result);
495
496         /* Fail depending jobs on failure */
497         if (result != JOB_DONE) {
498
499                 if (t == JOB_START ||
500                     t == JOB_VERIFY_ACTIVE ||
501                     t == JOB_RELOAD_OR_START) {
502
503                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
504                                 if (other->job &&
505                                     (other->job->type == JOB_START ||
506                                      other->job->type == JOB_VERIFY_ACTIVE ||
507                                      other->job->type == JOB_RELOAD_OR_START)) {
508                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
509                                         recursed = true;
510                                 }
511
512                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
513                                 if (other->job &&
514                                     (other->job->type == JOB_START ||
515                                      other->job->type == JOB_VERIFY_ACTIVE ||
516                                      other->job->type == JOB_RELOAD_OR_START)) {
517                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
518                                         recursed = true;
519                                 }
520
521                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
522                                 if (other->job &&
523                                     !other->job->override &&
524                                     (other->job->type == JOB_START ||
525                                      other->job->type == JOB_VERIFY_ACTIVE ||
526                                      other->job->type == JOB_RELOAD_OR_START)) {
527                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
528                                         recursed = true;
529                                 }
530
531                 } else if (t == JOB_STOP) {
532
533                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
534                                 if (other->job &&
535                                     (other->job->type == JOB_START ||
536                                      other->job->type == JOB_VERIFY_ACTIVE ||
537                                      other->job->type == JOB_RELOAD_OR_START)) {
538                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
539                                         recursed = true;
540                                 }
541                 }
542         }
543
544         /* Trigger OnFailure dependencies that are not generated by
545          * the unit itself. We don't tread JOB_CANCELED as failure in
546          * this context. And JOB_FAILURE is already handled by the
547          * unit itself. */
548         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
549                 log_notice("Job %s/%s failed with result '%s'.",
550                            u->id,
551                            job_type_to_string(t),
552                            job_result_to_string(result));
553
554                 unit_trigger_on_failure(u);
555         }
556
557 finish:
558         /* Try to start the next jobs that can be started */
559         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
560                 if (other->job)
561                         job_add_to_run_queue(other->job);
562         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
563                 if (other->job)
564                         job_add_to_run_queue(other->job);
565
566         manager_check_finished(u->manager);
567
568         return recursed;
569 }
570
571 int job_start_timer(Job *j) {
572         struct itimerspec its;
573         struct epoll_event ev;
574         int fd, r;
575         assert(j);
576
577         if (j->unit->job_timeout <= 0 ||
578             j->timer_watch.type == WATCH_JOB_TIMER)
579                 return 0;
580
581         assert(j->timer_watch.type == WATCH_INVALID);
582
583         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
584                 r = -errno;
585                 goto fail;
586         }
587
588         zero(its);
589         timespec_store(&its.it_value, j->unit->job_timeout);
590
591         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
592                 r = -errno;
593                 goto fail;
594         }
595
596         zero(ev);
597         ev.data.ptr = &j->timer_watch;
598         ev.events = EPOLLIN;
599
600         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
601                 r = -errno;
602                 goto fail;
603         }
604
605         j->timer_watch.type = WATCH_JOB_TIMER;
606         j->timer_watch.fd = fd;
607         j->timer_watch.data.job = j;
608
609         return 0;
610
611 fail:
612         if (fd >= 0)
613                 close_nointr_nofail(fd);
614
615         return r;
616 }
617
618 void job_add_to_run_queue(Job *j) {
619         assert(j);
620         assert(j->installed);
621
622         if (j->in_run_queue)
623                 return;
624
625         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
626         j->in_run_queue = true;
627 }
628
629 void job_add_to_dbus_queue(Job *j) {
630         assert(j);
631         assert(j->installed);
632
633         if (j->in_dbus_queue)
634                 return;
635
636         /* We don't check if anybody is subscribed here, since this
637          * job might just have been created and not yet assigned to a
638          * connection/client. */
639
640         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
641         j->in_dbus_queue = true;
642 }
643
644 char *job_dbus_path(Job *j) {
645         char *p;
646
647         assert(j);
648
649         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
650                 return NULL;
651
652         return p;
653 }
654
655 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
656         assert(j);
657         assert(w == &j->timer_watch);
658
659         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
660         job_finish_and_invalidate(j, JOB_TIMEOUT);
661 }
662
663 static const char* const job_state_table[_JOB_STATE_MAX] = {
664         [JOB_WAITING] = "waiting",
665         [JOB_RUNNING] = "running"
666 };
667
668 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
669
670 static const char* const job_type_table[_JOB_TYPE_MAX] = {
671         [JOB_START] = "start",
672         [JOB_VERIFY_ACTIVE] = "verify-active",
673         [JOB_STOP] = "stop",
674         [JOB_RELOAD] = "reload",
675         [JOB_RELOAD_OR_START] = "reload-or-start",
676         [JOB_RESTART] = "restart",
677         [JOB_TRY_RESTART] = "try-restart",
678 };
679
680 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
681
682 static const char* const job_mode_table[_JOB_MODE_MAX] = {
683         [JOB_FAIL] = "fail",
684         [JOB_REPLACE] = "replace",
685         [JOB_ISOLATE] = "isolate",
686         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
687         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
688 };
689
690 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
691
692 static const char* const job_result_table[_JOB_RESULT_MAX] = {
693         [JOB_DONE] = "done",
694         [JOB_CANCELED] = "canceled",
695         [JOB_TIMEOUT] = "timeout",
696         [JOB_FAILED] = "failed",
697         [JOB_DEPENDENCY] = "dependency",
698         [JOB_SKIPPED] = "skipped"
699 };
700
701 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);