chiark / gitweb /
job: allow job_free() only on already unlinked jobs
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Manager *m, JobType type, Unit *unit) {
37         Job *j;
38
39         assert(m);
40         assert(type < _JOB_TYPE_MAX);
41         assert(unit);
42
43         if (!(j = new0(Job, 1)))
44                 return NULL;
45
46         j->manager = m;
47         j->id = m->current_job_id++;
48         j->type = type;
49         j->unit = unit;
50
51         j->timer_watch.type = WATCH_INVALID;
52
53         /* We don't link it here, that's what job_dependency() is for */
54
55         return j;
56 }
57
58 void job_free(Job *j) {
59         assert(j);
60
61         /* Detach from next 'bigger' objects */
62         if (j->installed) {
63                 bus_job_send_removed_signal(j);
64
65                 if (j->unit->job == j) {
66                         j->unit->job = NULL;
67                         unit_add_to_gc_queue(j->unit);
68                 }
69
70                 hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
71                 j->installed = false;
72         }
73
74         assert(!j->transaction_prev);
75         assert(!j->transaction_next);
76         assert(!j->subject_list);
77         assert(!j->object_list);
78
79         if (j->in_run_queue)
80                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
81
82         if (j->in_dbus_queue)
83                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
84
85         if (j->timer_watch.type != WATCH_INVALID) {
86                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
87                 assert(j->timer_watch.data.job == j);
88                 assert(j->timer_watch.fd >= 0);
89
90                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
91                 close_nointr_nofail(j->timer_watch.fd);
92         }
93
94         free(j->bus_client);
95         free(j);
96 }
97
98 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
99         JobDependency *l;
100
101         assert(object);
102
103         /* Adds a new job link, which encodes that the 'subject' job
104          * needs the 'object' job in some way. If 'subject' is NULL
105          * this means the 'anchor' job (i.e. the one the user
106          * explicitly asked for) is the requester. */
107
108         if (!(l = new0(JobDependency, 1)))
109                 return NULL;
110
111         l->subject = subject;
112         l->object = object;
113         l->matters = matters;
114         l->conflicts = conflicts;
115
116         if (subject)
117                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
118         else
119                 LIST_PREPEND(JobDependency, subject, object->manager->transaction_anchor, l);
120
121         LIST_PREPEND(JobDependency, object, object->object_list, l);
122
123         return l;
124 }
125
126 void job_dependency_free(JobDependency *l) {
127         assert(l);
128
129         if (l->subject)
130                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
131         else
132                 LIST_REMOVE(JobDependency, subject, l->object->manager->transaction_anchor, l);
133
134         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
135
136         free(l);
137 }
138
139 void job_dump(Job *j, FILE*f, const char *prefix) {
140         assert(j);
141         assert(f);
142
143         if (!prefix)
144                 prefix = "";
145
146         fprintf(f,
147                 "%s-> Job %u:\n"
148                 "%s\tAction: %s -> %s\n"
149                 "%s\tState: %s\n"
150                 "%s\tForced: %s\n",
151                 prefix, j->id,
152                 prefix, j->unit->id, job_type_to_string(j->type),
153                 prefix, job_state_to_string(j->state),
154                 prefix, yes_no(j->override));
155 }
156
157 bool job_is_anchor(Job *j) {
158         JobDependency *l;
159
160         assert(j);
161
162         LIST_FOREACH(object, l, j->object_list)
163                 if (!l->subject)
164                         return true;
165
166         return false;
167 }
168
169 /*
170  * Merging is commutative, so imagine the matrix as symmetric. We store only
171  * its lower triangle to avoid duplication. We don't store the main diagonal,
172  * because A merged with A is simply A.
173  *
174  * Merging is associative! A merged with B merged with C is the same as
175  * A merged with C merged with B.
176  *
177  * Mergeability is transitive! If A can be merged with B and B with C then
178  * A also with C.
179  *
180  * Also, if A merged with B cannot be merged with C, then either A or B cannot
181  * be merged with C either.
182  */
183 static const JobType job_merging_table[] = {
184 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
185 /************************************************************************************************************************************/
186 /*JOB_START          */
187 /*JOB_VERIFY_ACTIVE  */ JOB_START,
188 /*JOB_STOP           */ -1,                  -1,
189 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
190 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
191 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
192 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
193 };
194
195 JobType job_type_lookup_merge(JobType a, JobType b) {
196         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
197         assert(a >= 0 && a < _JOB_TYPE_MAX);
198         assert(b >= 0 && b < _JOB_TYPE_MAX);
199
200         if (a == b)
201                 return a;
202
203         if (a < b) {
204                 JobType tmp = a;
205                 a = b;
206                 b = tmp;
207         }
208
209         return job_merging_table[(a - 1) * a / 2 + b];
210 }
211
212 bool job_type_is_redundant(JobType a, UnitActiveState b) {
213         switch (a) {
214
215         case JOB_START:
216                 return
217                         b == UNIT_ACTIVE ||
218                         b == UNIT_RELOADING;
219
220         case JOB_STOP:
221                 return
222                         b == UNIT_INACTIVE ||
223                         b == UNIT_FAILED;
224
225         case JOB_VERIFY_ACTIVE:
226                 return
227                         b == UNIT_ACTIVE ||
228                         b == UNIT_RELOADING;
229
230         case JOB_RELOAD:
231                 return
232                         b == UNIT_RELOADING;
233
234         case JOB_RELOAD_OR_START:
235                 return
236                         b == UNIT_ACTIVATING ||
237                         b == UNIT_RELOADING;
238
239         case JOB_RESTART:
240                 return
241                         b == UNIT_ACTIVATING;
242
243         case JOB_TRY_RESTART:
244                 return
245                         b == UNIT_ACTIVATING;
246
247         default:
248                 assert_not_reached("Invalid job type");
249         }
250 }
251
252 bool job_is_runnable(Job *j) {
253         Iterator i;
254         Unit *other;
255
256         assert(j);
257         assert(j->installed);
258
259         /* Checks whether there is any job running for the units this
260          * job needs to be running after (in the case of a 'positive'
261          * job type) or before (in the case of a 'negative' job
262          * type. */
263
264         /* First check if there is an override */
265         if (j->ignore_order)
266                 return true;
267
268         if (j->type == JOB_START ||
269             j->type == JOB_VERIFY_ACTIVE ||
270             j->type == JOB_RELOAD ||
271             j->type == JOB_RELOAD_OR_START) {
272
273                 /* Immediate result is that the job is or might be
274                  * started. In this case lets wait for the
275                  * dependencies, regardless whether they are
276                  * starting or stopping something. */
277
278                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
279                         if (other->job)
280                                 return false;
281         }
282
283         /* Also, if something else is being stopped and we should
284          * change state after it, then lets wait. */
285
286         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
287                 if (other->job &&
288                     (other->job->type == JOB_STOP ||
289                      other->job->type == JOB_RESTART ||
290                      other->job->type == JOB_TRY_RESTART))
291                         return false;
292
293         /* This means that for a service a and a service b where b
294          * shall be started after a:
295          *
296          *  start a + start b → 1st step start a, 2nd step start b
297          *  start a + stop b  → 1st step stop b,  2nd step start a
298          *  stop a  + start b → 1st step stop a,  2nd step start b
299          *  stop a  + stop b  → 1st step stop b,  2nd step stop a
300          *
301          *  This has the side effect that restarts are properly
302          *  synchronized too. */
303
304         return true;
305 }
306
307 static void job_change_type(Job *j, JobType newtype) {
308         log_debug("Converting job %s/%s -> %s/%s",
309                   j->unit->id, job_type_to_string(j->type),
310                   j->unit->id, job_type_to_string(newtype));
311
312         j->type = newtype;
313 }
314
315 int job_run_and_invalidate(Job *j) {
316         int r;
317         uint32_t id;
318         Manager *m;
319
320         assert(j);
321         assert(j->installed);
322
323         if (j->in_run_queue) {
324                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
325                 j->in_run_queue = false;
326         }
327
328         if (j->state != JOB_WAITING)
329                 return 0;
330
331         if (!job_is_runnable(j))
332                 return -EAGAIN;
333
334         j->state = JOB_RUNNING;
335         job_add_to_dbus_queue(j);
336
337         /* While we execute this operation the job might go away (for
338          * example: because it is replaced by a new, conflicting
339          * job.) To make sure we don't access a freed job later on we
340          * store the id here, so that we can verify the job is still
341          * valid. */
342         id = j->id;
343         m = j->manager;
344
345         switch (j->type) {
346
347                 case JOB_RELOAD_OR_START:
348                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
349                                 job_change_type(j, JOB_RELOAD);
350                                 r = unit_reload(j->unit);
351                                 break;
352                         }
353                         job_change_type(j, JOB_START);
354                         /* fall through */
355
356                 case JOB_START:
357                         r = unit_start(j->unit);
358
359                         /* If this unit cannot be started, then simply wait */
360                         if (r == -EBADR)
361                                 r = 0;
362                         break;
363
364                 case JOB_VERIFY_ACTIVE: {
365                         UnitActiveState t = unit_active_state(j->unit);
366                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
367                                 r = -EALREADY;
368                         else if (t == UNIT_ACTIVATING)
369                                 r = -EAGAIN;
370                         else
371                                 r = -ENOEXEC;
372                         break;
373                 }
374
375                 case JOB_TRY_RESTART:
376                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
377                                 r = -ENOEXEC;
378                                 break;
379                         }
380                         job_change_type(j, JOB_RESTART);
381                         /* fall through */
382
383                 case JOB_STOP:
384                 case JOB_RESTART:
385                         r = unit_stop(j->unit);
386
387                         /* If this unit cannot stopped, then simply wait. */
388                         if (r == -EBADR)
389                                 r = 0;
390                         break;
391
392                 case JOB_RELOAD:
393                         r = unit_reload(j->unit);
394                         break;
395
396                 default:
397                         assert_not_reached("Unknown job type");
398         }
399
400         if ((j = manager_get_job(m, id))) {
401                 if (r == -EALREADY)
402                         r = job_finish_and_invalidate(j, JOB_DONE);
403                 else if (r == -ENOEXEC)
404                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
405                 else if (r == -EAGAIN)
406                         j->state = JOB_WAITING;
407                 else if (r < 0)
408                         r = job_finish_and_invalidate(j, JOB_FAILED);
409         }
410
411         return r;
412 }
413
414 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
415         assert(u);
416
417         if (t == JOB_START) {
418
419                 switch (result) {
420
421                 case JOB_DONE:
422                         if (u->condition_result)
423                                 unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
424                         break;
425
426                 case JOB_FAILED:
427                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
428                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
429                         break;
430
431                 case JOB_DEPENDENCY:
432                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
433                         break;
434
435                 case JOB_TIMEOUT:
436                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
437                         break;
438
439                 default:
440                         ;
441                 }
442
443         } else if (t == JOB_STOP) {
444
445                 switch (result) {
446
447                 case JOB_TIMEOUT:
448                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
449                         break;
450
451                 case JOB_DONE:
452                 case JOB_FAILED:
453                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
454                         break;
455
456                 default:
457                         ;
458                 }
459         }
460 }
461
462 int job_finish_and_invalidate(Job *j, JobResult result) {
463         Unit *u;
464         Unit *other;
465         JobType t;
466         Iterator i;
467         bool recursed = false;
468
469         assert(j);
470         assert(j->installed);
471
472         job_add_to_dbus_queue(j);
473
474         /* Patch restart jobs so that they become normal start jobs */
475         if (result == JOB_DONE && j->type == JOB_RESTART) {
476
477                 job_change_type(j, JOB_START);
478                 j->state = JOB_WAITING;
479
480                 job_add_to_run_queue(j);
481
482                 u = j->unit;
483                 goto finish;
484         }
485
486         j->result = result;
487
488         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
489
490         if (result == JOB_FAILED)
491                 j->manager->n_failed_jobs ++;
492
493         u = j->unit;
494         t = j->type;
495         job_free(j);
496
497         job_print_status_message(u, t, result);
498
499         /* Fail depending jobs on failure */
500         if (result != JOB_DONE) {
501
502                 if (t == JOB_START ||
503                     t == JOB_VERIFY_ACTIVE ||
504                     t == JOB_RELOAD_OR_START) {
505
506                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
507                                 if (other->job &&
508                                     (other->job->type == JOB_START ||
509                                      other->job->type == JOB_VERIFY_ACTIVE ||
510                                      other->job->type == JOB_RELOAD_OR_START)) {
511                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
512                                         recursed = true;
513                                 }
514
515                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
516                                 if (other->job &&
517                                     (other->job->type == JOB_START ||
518                                      other->job->type == JOB_VERIFY_ACTIVE ||
519                                      other->job->type == JOB_RELOAD_OR_START)) {
520                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
521                                         recursed = true;
522                                 }
523
524                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
525                                 if (other->job &&
526                                     !other->job->override &&
527                                     (other->job->type == JOB_START ||
528                                      other->job->type == JOB_VERIFY_ACTIVE ||
529                                      other->job->type == JOB_RELOAD_OR_START)) {
530                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
531                                         recursed = true;
532                                 }
533
534                 } else if (t == JOB_STOP) {
535
536                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
537                                 if (other->job &&
538                                     (other->job->type == JOB_START ||
539                                      other->job->type == JOB_VERIFY_ACTIVE ||
540                                      other->job->type == JOB_RELOAD_OR_START)) {
541                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
542                                         recursed = true;
543                                 }
544                 }
545         }
546
547         /* Trigger OnFailure dependencies that are not generated by
548          * the unit itself. We don't tread JOB_CANCELED as failure in
549          * this context. And JOB_FAILURE is already handled by the
550          * unit itself. */
551         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
552                 log_notice("Job %s/%s failed with result '%s'.",
553                            u->id,
554                            job_type_to_string(t),
555                            job_result_to_string(result));
556
557                 unit_trigger_on_failure(u);
558         }
559
560 finish:
561         /* Try to start the next jobs that can be started */
562         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
563                 if (other->job)
564                         job_add_to_run_queue(other->job);
565         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
566                 if (other->job)
567                         job_add_to_run_queue(other->job);
568
569         manager_check_finished(u->manager);
570
571         return recursed;
572 }
573
574 int job_start_timer(Job *j) {
575         struct itimerspec its;
576         struct epoll_event ev;
577         int fd, r;
578         assert(j);
579
580         if (j->unit->job_timeout <= 0 ||
581             j->timer_watch.type == WATCH_JOB_TIMER)
582                 return 0;
583
584         assert(j->timer_watch.type == WATCH_INVALID);
585
586         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
587                 r = -errno;
588                 goto fail;
589         }
590
591         zero(its);
592         timespec_store(&its.it_value, j->unit->job_timeout);
593
594         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
595                 r = -errno;
596                 goto fail;
597         }
598
599         zero(ev);
600         ev.data.ptr = &j->timer_watch;
601         ev.events = EPOLLIN;
602
603         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
604                 r = -errno;
605                 goto fail;
606         }
607
608         j->timer_watch.type = WATCH_JOB_TIMER;
609         j->timer_watch.fd = fd;
610         j->timer_watch.data.job = j;
611
612         return 0;
613
614 fail:
615         if (fd >= 0)
616                 close_nointr_nofail(fd);
617
618         return r;
619 }
620
621 void job_add_to_run_queue(Job *j) {
622         assert(j);
623         assert(j->installed);
624
625         if (j->in_run_queue)
626                 return;
627
628         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
629         j->in_run_queue = true;
630 }
631
632 void job_add_to_dbus_queue(Job *j) {
633         assert(j);
634         assert(j->installed);
635
636         if (j->in_dbus_queue)
637                 return;
638
639         /* We don't check if anybody is subscribed here, since this
640          * job might just have been created and not yet assigned to a
641          * connection/client. */
642
643         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
644         j->in_dbus_queue = true;
645 }
646
647 char *job_dbus_path(Job *j) {
648         char *p;
649
650         assert(j);
651
652         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
653                 return NULL;
654
655         return p;
656 }
657
658 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
659         assert(j);
660         assert(w == &j->timer_watch);
661
662         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
663         job_finish_and_invalidate(j, JOB_TIMEOUT);
664 }
665
666 static const char* const job_state_table[_JOB_STATE_MAX] = {
667         [JOB_WAITING] = "waiting",
668         [JOB_RUNNING] = "running"
669 };
670
671 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
672
673 static const char* const job_type_table[_JOB_TYPE_MAX] = {
674         [JOB_START] = "start",
675         [JOB_VERIFY_ACTIVE] = "verify-active",
676         [JOB_STOP] = "stop",
677         [JOB_RELOAD] = "reload",
678         [JOB_RELOAD_OR_START] = "reload-or-start",
679         [JOB_RESTART] = "restart",
680         [JOB_TRY_RESTART] = "try-restart",
681 };
682
683 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
684
685 static const char* const job_mode_table[_JOB_MODE_MAX] = {
686         [JOB_FAIL] = "fail",
687         [JOB_REPLACE] = "replace",
688         [JOB_ISOLATE] = "isolate",
689         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
690         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
691 };
692
693 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
694
695 static const char* const job_result_table[_JOB_RESULT_MAX] = {
696         [JOB_DONE] = "done",
697         [JOB_CANCELED] = "canceled",
698         [JOB_TIMEOUT] = "timeout",
699         [JOB_FAILED] = "failed",
700         [JOB_DEPENDENCY] = "dependency",
701         [JOB_SKIPPED] = "skipped"
702 };
703
704 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);