chiark / gitweb /
f3c76d66b59dd85f1acd7948ca290c8ef7c9d0d3
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Manager *m, JobType type, Unit *unit) {
37         Job *j;
38
39         assert(m);
40         assert(type < _JOB_TYPE_MAX);
41         assert(unit);
42
43         if (!(j = new0(Job, 1)))
44                 return NULL;
45
46         j->manager = m;
47         j->id = m->current_job_id++;
48         j->type = type;
49         j->unit = unit;
50
51         j->timer_watch.type = WATCH_INVALID;
52
53         /* We don't link it here, that's what job_dependency() is for */
54
55         return j;
56 }
57
58 void job_free(Job *j) {
59         assert(j);
60
61         /* Detach from next 'bigger' objects */
62         if (j->installed) {
63                 bus_job_send_removed_signal(j);
64
65                 if (j->unit->job == j) {
66                         j->unit->job = NULL;
67                         unit_add_to_gc_queue(j->unit);
68                 }
69
70                 hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
71                 j->installed = false;
72         }
73
74         /* Detach from next 'smaller' objects */
75         manager_transaction_unlink_job(j->manager, j, true);
76
77         if (j->in_run_queue)
78                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
79
80         if (j->in_dbus_queue)
81                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
82
83         if (j->timer_watch.type != WATCH_INVALID) {
84                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
85                 assert(j->timer_watch.data.job == j);
86                 assert(j->timer_watch.fd >= 0);
87
88                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
89                 close_nointr_nofail(j->timer_watch.fd);
90         }
91
92         free(j->bus_client);
93         free(j);
94 }
95
96 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
97         JobDependency *l;
98
99         assert(object);
100
101         /* Adds a new job link, which encodes that the 'subject' job
102          * needs the 'object' job in some way. If 'subject' is NULL
103          * this means the 'anchor' job (i.e. the one the user
104          * explicitly asked for) is the requester. */
105
106         if (!(l = new0(JobDependency, 1)))
107                 return NULL;
108
109         l->subject = subject;
110         l->object = object;
111         l->matters = matters;
112         l->conflicts = conflicts;
113
114         if (subject)
115                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
116         else
117                 LIST_PREPEND(JobDependency, subject, object->manager->transaction_anchor, l);
118
119         LIST_PREPEND(JobDependency, object, object->object_list, l);
120
121         return l;
122 }
123
124 void job_dependency_free(JobDependency *l) {
125         assert(l);
126
127         if (l->subject)
128                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
129         else
130                 LIST_REMOVE(JobDependency, subject, l->object->manager->transaction_anchor, l);
131
132         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
133
134         free(l);
135 }
136
137 void job_dump(Job *j, FILE*f, const char *prefix) {
138         assert(j);
139         assert(f);
140
141         if (!prefix)
142                 prefix = "";
143
144         fprintf(f,
145                 "%s-> Job %u:\n"
146                 "%s\tAction: %s -> %s\n"
147                 "%s\tState: %s\n"
148                 "%s\tForced: %s\n",
149                 prefix, j->id,
150                 prefix, j->unit->id, job_type_to_string(j->type),
151                 prefix, job_state_to_string(j->state),
152                 prefix, yes_no(j->override));
153 }
154
155 bool job_is_anchor(Job *j) {
156         JobDependency *l;
157
158         assert(j);
159
160         LIST_FOREACH(object, l, j->object_list)
161                 if (!l->subject)
162                         return true;
163
164         return false;
165 }
166
167 /*
168  * Merging is commutative, so imagine the matrix as symmetric. We store only
169  * its lower triangle to avoid duplication. We don't store the main diagonal,
170  * because A merged with A is simply A.
171  *
172  * Merging is associative! A merged with B merged with C is the same as
173  * A merged with C merged with B.
174  *
175  * Mergeability is transitive! If A can be merged with B and B with C then
176  * A also with C.
177  *
178  * Also, if A merged with B cannot be merged with C, then either A or B cannot
179  * be merged with C either.
180  */
181 static const JobType job_merging_table[] = {
182 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
183 /************************************************************************************************************************************/
184 /*JOB_START          */
185 /*JOB_VERIFY_ACTIVE  */ JOB_START,
186 /*JOB_STOP           */ -1,                  -1,
187 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
188 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
189 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
190 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
191 };
192
193 JobType job_type_lookup_merge(JobType a, JobType b) {
194         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
195         assert(a >= 0 && a < _JOB_TYPE_MAX);
196         assert(b >= 0 && b < _JOB_TYPE_MAX);
197
198         if (a == b)
199                 return a;
200
201         if (a < b) {
202                 JobType tmp = a;
203                 a = b;
204                 b = tmp;
205         }
206
207         return job_merging_table[(a - 1) * a / 2 + b];
208 }
209
210 bool job_type_is_redundant(JobType a, UnitActiveState b) {
211         switch (a) {
212
213         case JOB_START:
214                 return
215                         b == UNIT_ACTIVE ||
216                         b == UNIT_RELOADING;
217
218         case JOB_STOP:
219                 return
220                         b == UNIT_INACTIVE ||
221                         b == UNIT_FAILED;
222
223         case JOB_VERIFY_ACTIVE:
224                 return
225                         b == UNIT_ACTIVE ||
226                         b == UNIT_RELOADING;
227
228         case JOB_RELOAD:
229                 return
230                         b == UNIT_RELOADING;
231
232         case JOB_RELOAD_OR_START:
233                 return
234                         b == UNIT_ACTIVATING ||
235                         b == UNIT_RELOADING;
236
237         case JOB_RESTART:
238                 return
239                         b == UNIT_ACTIVATING;
240
241         case JOB_TRY_RESTART:
242                 return
243                         b == UNIT_ACTIVATING;
244
245         default:
246                 assert_not_reached("Invalid job type");
247         }
248 }
249
250 bool job_is_runnable(Job *j) {
251         Iterator i;
252         Unit *other;
253
254         assert(j);
255         assert(j->installed);
256
257         /* Checks whether there is any job running for the units this
258          * job needs to be running after (in the case of a 'positive'
259          * job type) or before (in the case of a 'negative' job
260          * type. */
261
262         /* First check if there is an override */
263         if (j->ignore_order)
264                 return true;
265
266         if (j->type == JOB_START ||
267             j->type == JOB_VERIFY_ACTIVE ||
268             j->type == JOB_RELOAD ||
269             j->type == JOB_RELOAD_OR_START) {
270
271                 /* Immediate result is that the job is or might be
272                  * started. In this case lets wait for the
273                  * dependencies, regardless whether they are
274                  * starting or stopping something. */
275
276                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
277                         if (other->job)
278                                 return false;
279         }
280
281         /* Also, if something else is being stopped and we should
282          * change state after it, then lets wait. */
283
284         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
285                 if (other->job &&
286                     (other->job->type == JOB_STOP ||
287                      other->job->type == JOB_RESTART ||
288                      other->job->type == JOB_TRY_RESTART))
289                         return false;
290
291         /* This means that for a service a and a service b where b
292          * shall be started after a:
293          *
294          *  start a + start b → 1st step start a, 2nd step start b
295          *  start a + stop b  → 1st step stop b,  2nd step start a
296          *  stop a  + start b → 1st step stop a,  2nd step start b
297          *  stop a  + stop b  → 1st step stop b,  2nd step stop a
298          *
299          *  This has the side effect that restarts are properly
300          *  synchronized too. */
301
302         return true;
303 }
304
305 static void job_change_type(Job *j, JobType newtype) {
306         log_debug("Converting job %s/%s -> %s/%s",
307                   j->unit->id, job_type_to_string(j->type),
308                   j->unit->id, job_type_to_string(newtype));
309
310         j->type = newtype;
311 }
312
313 int job_run_and_invalidate(Job *j) {
314         int r;
315         uint32_t id;
316         Manager *m;
317
318         assert(j);
319         assert(j->installed);
320
321         if (j->in_run_queue) {
322                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
323                 j->in_run_queue = false;
324         }
325
326         if (j->state != JOB_WAITING)
327                 return 0;
328
329         if (!job_is_runnable(j))
330                 return -EAGAIN;
331
332         j->state = JOB_RUNNING;
333         job_add_to_dbus_queue(j);
334
335         /* While we execute this operation the job might go away (for
336          * example: because it is replaced by a new, conflicting
337          * job.) To make sure we don't access a freed job later on we
338          * store the id here, so that we can verify the job is still
339          * valid. */
340         id = j->id;
341         m = j->manager;
342
343         switch (j->type) {
344
345                 case JOB_RELOAD_OR_START:
346                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
347                                 job_change_type(j, JOB_RELOAD);
348                                 r = unit_reload(j->unit);
349                                 break;
350                         }
351                         job_change_type(j, JOB_START);
352                         /* fall through */
353
354                 case JOB_START:
355                         r = unit_start(j->unit);
356
357                         /* If this unit cannot be started, then simply wait */
358                         if (r == -EBADR)
359                                 r = 0;
360                         break;
361
362                 case JOB_VERIFY_ACTIVE: {
363                         UnitActiveState t = unit_active_state(j->unit);
364                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
365                                 r = -EALREADY;
366                         else if (t == UNIT_ACTIVATING)
367                                 r = -EAGAIN;
368                         else
369                                 r = -ENOEXEC;
370                         break;
371                 }
372
373                 case JOB_TRY_RESTART:
374                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
375                                 r = -ENOEXEC;
376                                 break;
377                         }
378                         job_change_type(j, JOB_RESTART);
379                         /* fall through */
380
381                 case JOB_STOP:
382                 case JOB_RESTART:
383                         r = unit_stop(j->unit);
384
385                         /* If this unit cannot stopped, then simply wait. */
386                         if (r == -EBADR)
387                                 r = 0;
388                         break;
389
390                 case JOB_RELOAD:
391                         r = unit_reload(j->unit);
392                         break;
393
394                 default:
395                         assert_not_reached("Unknown job type");
396         }
397
398         if ((j = manager_get_job(m, id))) {
399                 if (r == -EALREADY)
400                         r = job_finish_and_invalidate(j, JOB_DONE);
401                 else if (r == -ENOEXEC)
402                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
403                 else if (r == -EAGAIN)
404                         j->state = JOB_WAITING;
405                 else if (r < 0)
406                         r = job_finish_and_invalidate(j, JOB_FAILED);
407         }
408
409         return r;
410 }
411
412 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
413         assert(u);
414
415         if (t == JOB_START) {
416
417                 switch (result) {
418
419                 case JOB_DONE:
420                         if (u->condition_result)
421                                 unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
422                         break;
423
424                 case JOB_FAILED:
425                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
426                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
427                         break;
428
429                 case JOB_DEPENDENCY:
430                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
431                         break;
432
433                 case JOB_TIMEOUT:
434                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
435                         break;
436
437                 default:
438                         ;
439                 }
440
441         } else if (t == JOB_STOP) {
442
443                 switch (result) {
444
445                 case JOB_TIMEOUT:
446                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
447                         break;
448
449                 case JOB_DONE:
450                 case JOB_FAILED:
451                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
452                         break;
453
454                 default:
455                         ;
456                 }
457         }
458 }
459
460 int job_finish_and_invalidate(Job *j, JobResult result) {
461         Unit *u;
462         Unit *other;
463         JobType t;
464         Iterator i;
465         bool recursed = false;
466
467         assert(j);
468         assert(j->installed);
469
470         job_add_to_dbus_queue(j);
471
472         /* Patch restart jobs so that they become normal start jobs */
473         if (result == JOB_DONE && j->type == JOB_RESTART) {
474
475                 job_change_type(j, JOB_START);
476                 j->state = JOB_WAITING;
477
478                 job_add_to_run_queue(j);
479
480                 u = j->unit;
481                 goto finish;
482         }
483
484         j->result = result;
485
486         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
487
488         if (result == JOB_FAILED)
489                 j->manager->n_failed_jobs ++;
490
491         u = j->unit;
492         t = j->type;
493         job_free(j);
494
495         job_print_status_message(u, t, result);
496
497         /* Fail depending jobs on failure */
498         if (result != JOB_DONE) {
499
500                 if (t == JOB_START ||
501                     t == JOB_VERIFY_ACTIVE ||
502                     t == JOB_RELOAD_OR_START) {
503
504                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
505                                 if (other->job &&
506                                     (other->job->type == JOB_START ||
507                                      other->job->type == JOB_VERIFY_ACTIVE ||
508                                      other->job->type == JOB_RELOAD_OR_START)) {
509                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
510                                         recursed = true;
511                                 }
512
513                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
514                                 if (other->job &&
515                                     (other->job->type == JOB_START ||
516                                      other->job->type == JOB_VERIFY_ACTIVE ||
517                                      other->job->type == JOB_RELOAD_OR_START)) {
518                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
519                                         recursed = true;
520                                 }
521
522                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
523                                 if (other->job &&
524                                     !other->job->override &&
525                                     (other->job->type == JOB_START ||
526                                      other->job->type == JOB_VERIFY_ACTIVE ||
527                                      other->job->type == JOB_RELOAD_OR_START)) {
528                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
529                                         recursed = true;
530                                 }
531
532                 } else if (t == JOB_STOP) {
533
534                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
535                                 if (other->job &&
536                                     (other->job->type == JOB_START ||
537                                      other->job->type == JOB_VERIFY_ACTIVE ||
538                                      other->job->type == JOB_RELOAD_OR_START)) {
539                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
540                                         recursed = true;
541                                 }
542                 }
543         }
544
545         /* Trigger OnFailure dependencies that are not generated by
546          * the unit itself. We don't tread JOB_CANCELED as failure in
547          * this context. And JOB_FAILURE is already handled by the
548          * unit itself. */
549         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
550                 log_notice("Job %s/%s failed with result '%s'.",
551                            u->id,
552                            job_type_to_string(t),
553                            job_result_to_string(result));
554
555                 unit_trigger_on_failure(u);
556         }
557
558 finish:
559         /* Try to start the next jobs that can be started */
560         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
561                 if (other->job)
562                         job_add_to_run_queue(other->job);
563         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
564                 if (other->job)
565                         job_add_to_run_queue(other->job);
566
567         manager_check_finished(u->manager);
568
569         return recursed;
570 }
571
572 int job_start_timer(Job *j) {
573         struct itimerspec its;
574         struct epoll_event ev;
575         int fd, r;
576         assert(j);
577
578         if (j->unit->job_timeout <= 0 ||
579             j->timer_watch.type == WATCH_JOB_TIMER)
580                 return 0;
581
582         assert(j->timer_watch.type == WATCH_INVALID);
583
584         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
585                 r = -errno;
586                 goto fail;
587         }
588
589         zero(its);
590         timespec_store(&its.it_value, j->unit->job_timeout);
591
592         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
593                 r = -errno;
594                 goto fail;
595         }
596
597         zero(ev);
598         ev.data.ptr = &j->timer_watch;
599         ev.events = EPOLLIN;
600
601         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
602                 r = -errno;
603                 goto fail;
604         }
605
606         j->timer_watch.type = WATCH_JOB_TIMER;
607         j->timer_watch.fd = fd;
608         j->timer_watch.data.job = j;
609
610         return 0;
611
612 fail:
613         if (fd >= 0)
614                 close_nointr_nofail(fd);
615
616         return r;
617 }
618
619 void job_add_to_run_queue(Job *j) {
620         assert(j);
621         assert(j->installed);
622
623         if (j->in_run_queue)
624                 return;
625
626         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
627         j->in_run_queue = true;
628 }
629
630 void job_add_to_dbus_queue(Job *j) {
631         assert(j);
632         assert(j->installed);
633
634         if (j->in_dbus_queue)
635                 return;
636
637         /* We don't check if anybody is subscribed here, since this
638          * job might just have been created and not yet assigned to a
639          * connection/client. */
640
641         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
642         j->in_dbus_queue = true;
643 }
644
645 char *job_dbus_path(Job *j) {
646         char *p;
647
648         assert(j);
649
650         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
651                 return NULL;
652
653         return p;
654 }
655
656 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
657         assert(j);
658         assert(w == &j->timer_watch);
659
660         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
661         job_finish_and_invalidate(j, JOB_TIMEOUT);
662 }
663
664 static const char* const job_state_table[_JOB_STATE_MAX] = {
665         [JOB_WAITING] = "waiting",
666         [JOB_RUNNING] = "running"
667 };
668
669 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
670
671 static const char* const job_type_table[_JOB_TYPE_MAX] = {
672         [JOB_START] = "start",
673         [JOB_VERIFY_ACTIVE] = "verify-active",
674         [JOB_STOP] = "stop",
675         [JOB_RELOAD] = "reload",
676         [JOB_RELOAD_OR_START] = "reload-or-start",
677         [JOB_RESTART] = "restart",
678         [JOB_TRY_RESTART] = "try-restart",
679 };
680
681 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
682
683 static const char* const job_mode_table[_JOB_MODE_MAX] = {
684         [JOB_FAIL] = "fail",
685         [JOB_REPLACE] = "replace",
686         [JOB_ISOLATE] = "isolate",
687         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
688         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
689 };
690
691 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
692
693 static const char* const job_result_table[_JOB_RESULT_MAX] = {
694         [JOB_DONE] = "done",
695         [JOB_CANCELED] = "canceled",
696         [JOB_TIMEOUT] = "timeout",
697         [JOB_FAILED] = "failed",
698         [JOB_DEPENDENCY] = "dependency",
699         [JOB_SKIPPED] = "skipped"
700 };
701
702 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);