chiark / gitweb /
manager: Transaction as an object
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Manager *m, JobType type, Unit *unit) {
37         Job *j;
38
39         assert(m);
40         assert(type < _JOB_TYPE_MAX);
41         assert(unit);
42
43         if (!(j = new0(Job, 1)))
44                 return NULL;
45
46         j->manager = m;
47         j->id = m->current_job_id++;
48         j->type = type;
49         j->unit = unit;
50
51         j->timer_watch.type = WATCH_INVALID;
52
53         /* We don't link it here, that's what job_dependency() is for */
54
55         return j;
56 }
57
58 void job_uninstall(Job *j) {
59         assert(j->installed);
60         /* Detach from next 'bigger' objects */
61
62         bus_job_send_removed_signal(j);
63
64         if (j->unit->job == j) {
65                 j->unit->job = NULL;
66                 unit_add_to_gc_queue(j->unit);
67         }
68
69         hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
70         j->installed = false;
71 }
72
73 void job_free(Job *j) {
74         assert(j);
75         assert(!j->installed);
76         assert(!j->transaction_prev);
77         assert(!j->transaction_next);
78         assert(!j->subject_list);
79         assert(!j->object_list);
80
81         if (j->in_run_queue)
82                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
83
84         if (j->in_dbus_queue)
85                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
86
87         if (j->timer_watch.type != WATCH_INVALID) {
88                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
89                 assert(j->timer_watch.data.job == j);
90                 assert(j->timer_watch.fd >= 0);
91
92                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
93                 close_nointr_nofail(j->timer_watch.fd);
94         }
95
96         free(j->bus_client);
97         free(j);
98 }
99
100 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts, Transaction *tr) {
101         JobDependency *l;
102
103         assert(object);
104
105         /* Adds a new job link, which encodes that the 'subject' job
106          * needs the 'object' job in some way. If 'subject' is NULL
107          * this means the 'anchor' job (i.e. the one the user
108          * explicitly asked for) is the requester. */
109
110         if (!(l = new0(JobDependency, 1)))
111                 return NULL;
112
113         l->subject = subject;
114         l->object = object;
115         l->matters = matters;
116         l->conflicts = conflicts;
117
118         if (subject)
119                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
120         else
121                 LIST_PREPEND(JobDependency, subject, tr->anchor, l);
122
123         LIST_PREPEND(JobDependency, object, object->object_list, l);
124
125         return l;
126 }
127
128 void job_dependency_free(JobDependency *l, Transaction *tr) {
129         assert(l);
130
131         if (l->subject)
132                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
133         else
134                 LIST_REMOVE(JobDependency, subject, tr->anchor, l);
135
136         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
137
138         free(l);
139 }
140
141 void job_dump(Job *j, FILE*f, const char *prefix) {
142         assert(j);
143         assert(f);
144
145         if (!prefix)
146                 prefix = "";
147
148         fprintf(f,
149                 "%s-> Job %u:\n"
150                 "%s\tAction: %s -> %s\n"
151                 "%s\tState: %s\n"
152                 "%s\tForced: %s\n",
153                 prefix, j->id,
154                 prefix, j->unit->id, job_type_to_string(j->type),
155                 prefix, job_state_to_string(j->state),
156                 prefix, yes_no(j->override));
157 }
158
159 bool job_is_anchor(Job *j) {
160         JobDependency *l;
161
162         assert(j);
163
164         LIST_FOREACH(object, l, j->object_list)
165                 if (!l->subject)
166                         return true;
167
168         return false;
169 }
170
171 /*
172  * Merging is commutative, so imagine the matrix as symmetric. We store only
173  * its lower triangle to avoid duplication. We don't store the main diagonal,
174  * because A merged with A is simply A.
175  *
176  * Merging is associative! A merged with B merged with C is the same as
177  * A merged with C merged with B.
178  *
179  * Mergeability is transitive! If A can be merged with B and B with C then
180  * A also with C.
181  *
182  * Also, if A merged with B cannot be merged with C, then either A or B cannot
183  * be merged with C either.
184  */
185 static const JobType job_merging_table[] = {
186 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
187 /************************************************************************************************************************************/
188 /*JOB_START          */
189 /*JOB_VERIFY_ACTIVE  */ JOB_START,
190 /*JOB_STOP           */ -1,                  -1,
191 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
192 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
193 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
194 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
195 };
196
197 JobType job_type_lookup_merge(JobType a, JobType b) {
198         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
199         assert(a >= 0 && a < _JOB_TYPE_MAX);
200         assert(b >= 0 && b < _JOB_TYPE_MAX);
201
202         if (a == b)
203                 return a;
204
205         if (a < b) {
206                 JobType tmp = a;
207                 a = b;
208                 b = tmp;
209         }
210
211         return job_merging_table[(a - 1) * a / 2 + b];
212 }
213
214 bool job_type_is_redundant(JobType a, UnitActiveState b) {
215         switch (a) {
216
217         case JOB_START:
218                 return
219                         b == UNIT_ACTIVE ||
220                         b == UNIT_RELOADING;
221
222         case JOB_STOP:
223                 return
224                         b == UNIT_INACTIVE ||
225                         b == UNIT_FAILED;
226
227         case JOB_VERIFY_ACTIVE:
228                 return
229                         b == UNIT_ACTIVE ||
230                         b == UNIT_RELOADING;
231
232         case JOB_RELOAD:
233                 return
234                         b == UNIT_RELOADING;
235
236         case JOB_RELOAD_OR_START:
237                 return
238                         b == UNIT_ACTIVATING ||
239                         b == UNIT_RELOADING;
240
241         case JOB_RESTART:
242                 return
243                         b == UNIT_ACTIVATING;
244
245         case JOB_TRY_RESTART:
246                 return
247                         b == UNIT_ACTIVATING;
248
249         default:
250                 assert_not_reached("Invalid job type");
251         }
252 }
253
254 bool job_is_runnable(Job *j) {
255         Iterator i;
256         Unit *other;
257
258         assert(j);
259         assert(j->installed);
260
261         /* Checks whether there is any job running for the units this
262          * job needs to be running after (in the case of a 'positive'
263          * job type) or before (in the case of a 'negative' job
264          * type. */
265
266         /* First check if there is an override */
267         if (j->ignore_order)
268                 return true;
269
270         if (j->type == JOB_START ||
271             j->type == JOB_VERIFY_ACTIVE ||
272             j->type == JOB_RELOAD ||
273             j->type == JOB_RELOAD_OR_START) {
274
275                 /* Immediate result is that the job is or might be
276                  * started. In this case lets wait for the
277                  * dependencies, regardless whether they are
278                  * starting or stopping something. */
279
280                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
281                         if (other->job)
282                                 return false;
283         }
284
285         /* Also, if something else is being stopped and we should
286          * change state after it, then lets wait. */
287
288         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
289                 if (other->job &&
290                     (other->job->type == JOB_STOP ||
291                      other->job->type == JOB_RESTART ||
292                      other->job->type == JOB_TRY_RESTART))
293                         return false;
294
295         /* This means that for a service a and a service b where b
296          * shall be started after a:
297          *
298          *  start a + start b → 1st step start a, 2nd step start b
299          *  start a + stop b  → 1st step stop b,  2nd step start a
300          *  stop a  + start b → 1st step stop a,  2nd step start b
301          *  stop a  + stop b  → 1st step stop b,  2nd step stop a
302          *
303          *  This has the side effect that restarts are properly
304          *  synchronized too. */
305
306         return true;
307 }
308
309 static void job_change_type(Job *j, JobType newtype) {
310         log_debug("Converting job %s/%s -> %s/%s",
311                   j->unit->id, job_type_to_string(j->type),
312                   j->unit->id, job_type_to_string(newtype));
313
314         j->type = newtype;
315 }
316
317 int job_run_and_invalidate(Job *j) {
318         int r;
319         uint32_t id;
320         Manager *m;
321
322         assert(j);
323         assert(j->installed);
324
325         if (j->in_run_queue) {
326                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
327                 j->in_run_queue = false;
328         }
329
330         if (j->state != JOB_WAITING)
331                 return 0;
332
333         if (!job_is_runnable(j))
334                 return -EAGAIN;
335
336         j->state = JOB_RUNNING;
337         job_add_to_dbus_queue(j);
338
339         /* While we execute this operation the job might go away (for
340          * example: because it is replaced by a new, conflicting
341          * job.) To make sure we don't access a freed job later on we
342          * store the id here, so that we can verify the job is still
343          * valid. */
344         id = j->id;
345         m = j->manager;
346
347         switch (j->type) {
348
349                 case JOB_RELOAD_OR_START:
350                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
351                                 job_change_type(j, JOB_RELOAD);
352                                 r = unit_reload(j->unit);
353                                 break;
354                         }
355                         job_change_type(j, JOB_START);
356                         /* fall through */
357
358                 case JOB_START:
359                         r = unit_start(j->unit);
360
361                         /* If this unit cannot be started, then simply wait */
362                         if (r == -EBADR)
363                                 r = 0;
364                         break;
365
366                 case JOB_VERIFY_ACTIVE: {
367                         UnitActiveState t = unit_active_state(j->unit);
368                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
369                                 r = -EALREADY;
370                         else if (t == UNIT_ACTIVATING)
371                                 r = -EAGAIN;
372                         else
373                                 r = -ENOEXEC;
374                         break;
375                 }
376
377                 case JOB_TRY_RESTART:
378                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
379                                 r = -ENOEXEC;
380                                 break;
381                         }
382                         job_change_type(j, JOB_RESTART);
383                         /* fall through */
384
385                 case JOB_STOP:
386                 case JOB_RESTART:
387                         r = unit_stop(j->unit);
388
389                         /* If this unit cannot stopped, then simply wait. */
390                         if (r == -EBADR)
391                                 r = 0;
392                         break;
393
394                 case JOB_RELOAD:
395                         r = unit_reload(j->unit);
396                         break;
397
398                 default:
399                         assert_not_reached("Unknown job type");
400         }
401
402         if ((j = manager_get_job(m, id))) {
403                 if (r == -EALREADY)
404                         r = job_finish_and_invalidate(j, JOB_DONE);
405                 else if (r == -ENOEXEC)
406                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
407                 else if (r == -EAGAIN)
408                         j->state = JOB_WAITING;
409                 else if (r < 0)
410                         r = job_finish_and_invalidate(j, JOB_FAILED);
411         }
412
413         return r;
414 }
415
416 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
417         assert(u);
418
419         if (t == JOB_START) {
420
421                 switch (result) {
422
423                 case JOB_DONE:
424                         if (u->condition_result)
425                                 unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
426                         break;
427
428                 case JOB_FAILED:
429                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
430                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
431                         break;
432
433                 case JOB_DEPENDENCY:
434                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
435                         break;
436
437                 case JOB_TIMEOUT:
438                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
439                         break;
440
441                 default:
442                         ;
443                 }
444
445         } else if (t == JOB_STOP) {
446
447                 switch (result) {
448
449                 case JOB_TIMEOUT:
450                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
451                         break;
452
453                 case JOB_DONE:
454                 case JOB_FAILED:
455                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
456                         break;
457
458                 default:
459                         ;
460                 }
461         }
462 }
463
464 int job_finish_and_invalidate(Job *j, JobResult result) {
465         Unit *u;
466         Unit *other;
467         JobType t;
468         Iterator i;
469         bool recursed = false;
470
471         assert(j);
472         assert(j->installed);
473
474         job_add_to_dbus_queue(j);
475
476         /* Patch restart jobs so that they become normal start jobs */
477         if (result == JOB_DONE && j->type == JOB_RESTART) {
478
479                 job_change_type(j, JOB_START);
480                 j->state = JOB_WAITING;
481
482                 job_add_to_run_queue(j);
483
484                 u = j->unit;
485                 goto finish;
486         }
487
488         j->result = result;
489
490         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
491
492         if (result == JOB_FAILED)
493                 j->manager->n_failed_jobs ++;
494
495         u = j->unit;
496         t = j->type;
497         job_uninstall(j);
498         job_free(j);
499
500         job_print_status_message(u, t, result);
501
502         /* Fail depending jobs on failure */
503         if (result != JOB_DONE) {
504
505                 if (t == JOB_START ||
506                     t == JOB_VERIFY_ACTIVE ||
507                     t == JOB_RELOAD_OR_START) {
508
509                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
510                                 if (other->job &&
511                                     (other->job->type == JOB_START ||
512                                      other->job->type == JOB_VERIFY_ACTIVE ||
513                                      other->job->type == JOB_RELOAD_OR_START)) {
514                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
515                                         recursed = true;
516                                 }
517
518                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
519                                 if (other->job &&
520                                     (other->job->type == JOB_START ||
521                                      other->job->type == JOB_VERIFY_ACTIVE ||
522                                      other->job->type == JOB_RELOAD_OR_START)) {
523                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
524                                         recursed = true;
525                                 }
526
527                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
528                                 if (other->job &&
529                                     !other->job->override &&
530                                     (other->job->type == JOB_START ||
531                                      other->job->type == JOB_VERIFY_ACTIVE ||
532                                      other->job->type == JOB_RELOAD_OR_START)) {
533                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
534                                         recursed = true;
535                                 }
536
537                 } else if (t == JOB_STOP) {
538
539                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
540                                 if (other->job &&
541                                     (other->job->type == JOB_START ||
542                                      other->job->type == JOB_VERIFY_ACTIVE ||
543                                      other->job->type == JOB_RELOAD_OR_START)) {
544                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
545                                         recursed = true;
546                                 }
547                 }
548         }
549
550         /* Trigger OnFailure dependencies that are not generated by
551          * the unit itself. We don't tread JOB_CANCELED as failure in
552          * this context. And JOB_FAILURE is already handled by the
553          * unit itself. */
554         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
555                 log_notice("Job %s/%s failed with result '%s'.",
556                            u->id,
557                            job_type_to_string(t),
558                            job_result_to_string(result));
559
560                 unit_trigger_on_failure(u);
561         }
562
563 finish:
564         /* Try to start the next jobs that can be started */
565         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
566                 if (other->job)
567                         job_add_to_run_queue(other->job);
568         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
569                 if (other->job)
570                         job_add_to_run_queue(other->job);
571
572         manager_check_finished(u->manager);
573
574         return recursed;
575 }
576
577 int job_start_timer(Job *j) {
578         struct itimerspec its;
579         struct epoll_event ev;
580         int fd, r;
581         assert(j);
582
583         if (j->unit->job_timeout <= 0 ||
584             j->timer_watch.type == WATCH_JOB_TIMER)
585                 return 0;
586
587         assert(j->timer_watch.type == WATCH_INVALID);
588
589         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
590                 r = -errno;
591                 goto fail;
592         }
593
594         zero(its);
595         timespec_store(&its.it_value, j->unit->job_timeout);
596
597         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
598                 r = -errno;
599                 goto fail;
600         }
601
602         zero(ev);
603         ev.data.ptr = &j->timer_watch;
604         ev.events = EPOLLIN;
605
606         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
607                 r = -errno;
608                 goto fail;
609         }
610
611         j->timer_watch.type = WATCH_JOB_TIMER;
612         j->timer_watch.fd = fd;
613         j->timer_watch.data.job = j;
614
615         return 0;
616
617 fail:
618         if (fd >= 0)
619                 close_nointr_nofail(fd);
620
621         return r;
622 }
623
624 void job_add_to_run_queue(Job *j) {
625         assert(j);
626         assert(j->installed);
627
628         if (j->in_run_queue)
629                 return;
630
631         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
632         j->in_run_queue = true;
633 }
634
635 void job_add_to_dbus_queue(Job *j) {
636         assert(j);
637         assert(j->installed);
638
639         if (j->in_dbus_queue)
640                 return;
641
642         /* We don't check if anybody is subscribed here, since this
643          * job might just have been created and not yet assigned to a
644          * connection/client. */
645
646         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
647         j->in_dbus_queue = true;
648 }
649
650 char *job_dbus_path(Job *j) {
651         char *p;
652
653         assert(j);
654
655         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
656                 return NULL;
657
658         return p;
659 }
660
661 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
662         assert(j);
663         assert(w == &j->timer_watch);
664
665         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
666         job_finish_and_invalidate(j, JOB_TIMEOUT);
667 }
668
669 static const char* const job_state_table[_JOB_STATE_MAX] = {
670         [JOB_WAITING] = "waiting",
671         [JOB_RUNNING] = "running"
672 };
673
674 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
675
676 static const char* const job_type_table[_JOB_TYPE_MAX] = {
677         [JOB_START] = "start",
678         [JOB_VERIFY_ACTIVE] = "verify-active",
679         [JOB_STOP] = "stop",
680         [JOB_RELOAD] = "reload",
681         [JOB_RELOAD_OR_START] = "reload-or-start",
682         [JOB_RESTART] = "restart",
683         [JOB_TRY_RESTART] = "try-restart",
684 };
685
686 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
687
688 static const char* const job_mode_table[_JOB_MODE_MAX] = {
689         [JOB_FAIL] = "fail",
690         [JOB_REPLACE] = "replace",
691         [JOB_ISOLATE] = "isolate",
692         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
693         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
694 };
695
696 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
697
698 static const char* const job_result_table[_JOB_RESULT_MAX] = {
699         [JOB_DONE] = "done",
700         [JOB_CANCELED] = "canceled",
701         [JOB_TIMEOUT] = "timeout",
702         [JOB_FAILED] = "failed",
703         [JOB_DEPENDENCY] = "dependency",
704         [JOB_SKIPPED] = "skipped"
705 };
706
707 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);