chiark / gitweb /
job: separate job_install()
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Unit *unit, JobType type) {
37         Job *j;
38
39         assert(type < _JOB_TYPE_MAX);
40         assert(unit);
41
42         if (!(j = new0(Job, 1)))
43                 return NULL;
44
45         j->manager = unit->manager;
46         j->id = j->manager->current_job_id++;
47         j->type = type;
48         j->unit = unit;
49
50         j->timer_watch.type = WATCH_INVALID;
51
52         /* We don't link it here, that's what job_dependency() is for */
53
54         return j;
55 }
56
57 void job_free(Job *j) {
58         assert(j);
59         assert(!j->installed);
60         assert(!j->transaction_prev);
61         assert(!j->transaction_next);
62         assert(!j->subject_list);
63         assert(!j->object_list);
64
65         if (j->in_run_queue)
66                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
67
68         if (j->in_dbus_queue)
69                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
70
71         if (j->timer_watch.type != WATCH_INVALID) {
72                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
73                 assert(j->timer_watch.data.job == j);
74                 assert(j->timer_watch.fd >= 0);
75
76                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
77                 close_nointr_nofail(j->timer_watch.fd);
78         }
79
80         free(j->bus_client);
81         free(j);
82 }
83
84 void job_uninstall(Job *j) {
85         assert(j->installed);
86         /* Detach from next 'bigger' objects */
87
88         bus_job_send_removed_signal(j);
89
90         if (j->unit->job == j) {
91                 j->unit->job = NULL;
92                 unit_add_to_gc_queue(j->unit);
93         }
94
95         hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
96         j->installed = false;
97 }
98
99 void job_install(Job *j) {
100         Job *uj = j->unit->job;
101
102         if (uj) {
103                 job_uninstall(uj);
104                 job_free(uj);
105         }
106
107         j->unit->job = j;
108         j->installed = true;
109         j->manager->n_installed_jobs ++;
110         log_debug("Installed new job %s/%s as %u", j->unit->id, job_type_to_string(j->type), (unsigned) j->id);
111 }
112
113 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
114         JobDependency *l;
115
116         assert(object);
117
118         /* Adds a new job link, which encodes that the 'subject' job
119          * needs the 'object' job in some way. If 'subject' is NULL
120          * this means the 'anchor' job (i.e. the one the user
121          * explicitly asked for) is the requester. */
122
123         if (!(l = new0(JobDependency, 1)))
124                 return NULL;
125
126         l->subject = subject;
127         l->object = object;
128         l->matters = matters;
129         l->conflicts = conflicts;
130
131         if (subject)
132                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
133
134         LIST_PREPEND(JobDependency, object, object->object_list, l);
135
136         return l;
137 }
138
139 void job_dependency_free(JobDependency *l) {
140         assert(l);
141
142         if (l->subject)
143                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
144
145         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
146
147         free(l);
148 }
149
150 void job_dump(Job *j, FILE*f, const char *prefix) {
151         assert(j);
152         assert(f);
153
154         if (!prefix)
155                 prefix = "";
156
157         fprintf(f,
158                 "%s-> Job %u:\n"
159                 "%s\tAction: %s -> %s\n"
160                 "%s\tState: %s\n"
161                 "%s\tForced: %s\n",
162                 prefix, j->id,
163                 prefix, j->unit->id, job_type_to_string(j->type),
164                 prefix, job_state_to_string(j->state),
165                 prefix, yes_no(j->override));
166 }
167
168 /*
169  * Merging is commutative, so imagine the matrix as symmetric. We store only
170  * its lower triangle to avoid duplication. We don't store the main diagonal,
171  * because A merged with A is simply A.
172  *
173  * Merging is associative! A merged with B merged with C is the same as
174  * A merged with C merged with B.
175  *
176  * Mergeability is transitive! If A can be merged with B and B with C then
177  * A also with C.
178  *
179  * Also, if A merged with B cannot be merged with C, then either A or B cannot
180  * be merged with C either.
181  */
182 static const JobType job_merging_table[] = {
183 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
184 /************************************************************************************************************************************/
185 /*JOB_START          */
186 /*JOB_VERIFY_ACTIVE  */ JOB_START,
187 /*JOB_STOP           */ -1,                  -1,
188 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
189 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
190 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
191 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
192 };
193
194 JobType job_type_lookup_merge(JobType a, JobType b) {
195         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
196         assert(a >= 0 && a < _JOB_TYPE_MAX);
197         assert(b >= 0 && b < _JOB_TYPE_MAX);
198
199         if (a == b)
200                 return a;
201
202         if (a < b) {
203                 JobType tmp = a;
204                 a = b;
205                 b = tmp;
206         }
207
208         return job_merging_table[(a - 1) * a / 2 + b];
209 }
210
211 bool job_type_is_redundant(JobType a, UnitActiveState b) {
212         switch (a) {
213
214         case JOB_START:
215                 return
216                         b == UNIT_ACTIVE ||
217                         b == UNIT_RELOADING;
218
219         case JOB_STOP:
220                 return
221                         b == UNIT_INACTIVE ||
222                         b == UNIT_FAILED;
223
224         case JOB_VERIFY_ACTIVE:
225                 return
226                         b == UNIT_ACTIVE ||
227                         b == UNIT_RELOADING;
228
229         case JOB_RELOAD:
230                 return
231                         b == UNIT_RELOADING;
232
233         case JOB_RELOAD_OR_START:
234                 return
235                         b == UNIT_ACTIVATING ||
236                         b == UNIT_RELOADING;
237
238         case JOB_RESTART:
239                 return
240                         b == UNIT_ACTIVATING;
241
242         case JOB_TRY_RESTART:
243                 return
244                         b == UNIT_ACTIVATING;
245
246         default:
247                 assert_not_reached("Invalid job type");
248         }
249 }
250
251 bool job_is_runnable(Job *j) {
252         Iterator i;
253         Unit *other;
254
255         assert(j);
256         assert(j->installed);
257
258         /* Checks whether there is any job running for the units this
259          * job needs to be running after (in the case of a 'positive'
260          * job type) or before (in the case of a 'negative' job
261          * type. */
262
263         /* First check if there is an override */
264         if (j->ignore_order)
265                 return true;
266
267         if (j->type == JOB_START ||
268             j->type == JOB_VERIFY_ACTIVE ||
269             j->type == JOB_RELOAD ||
270             j->type == JOB_RELOAD_OR_START) {
271
272                 /* Immediate result is that the job is or might be
273                  * started. In this case lets wait for the
274                  * dependencies, regardless whether they are
275                  * starting or stopping something. */
276
277                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
278                         if (other->job)
279                                 return false;
280         }
281
282         /* Also, if something else is being stopped and we should
283          * change state after it, then lets wait. */
284
285         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
286                 if (other->job &&
287                     (other->job->type == JOB_STOP ||
288                      other->job->type == JOB_RESTART ||
289                      other->job->type == JOB_TRY_RESTART))
290                         return false;
291
292         /* This means that for a service a and a service b where b
293          * shall be started after a:
294          *
295          *  start a + start b â†’ 1st step start a, 2nd step start b
296          *  start a + stop b  â†’ 1st step stop b,  2nd step start a
297          *  stop a  + start b â†’ 1st step stop a,  2nd step start b
298          *  stop a  + stop b  â†’ 1st step stop b,  2nd step stop a
299          *
300          *  This has the side effect that restarts are properly
301          *  synchronized too. */
302
303         return true;
304 }
305
306 static void job_change_type(Job *j, JobType newtype) {
307         log_debug("Converting job %s/%s -> %s/%s",
308                   j->unit->id, job_type_to_string(j->type),
309                   j->unit->id, job_type_to_string(newtype));
310
311         j->type = newtype;
312 }
313
314 int job_run_and_invalidate(Job *j) {
315         int r;
316         uint32_t id;
317         Manager *m;
318
319         assert(j);
320         assert(j->installed);
321
322         if (j->in_run_queue) {
323                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
324                 j->in_run_queue = false;
325         }
326
327         if (j->state != JOB_WAITING)
328                 return 0;
329
330         if (!job_is_runnable(j))
331                 return -EAGAIN;
332
333         j->state = JOB_RUNNING;
334         job_add_to_dbus_queue(j);
335
336         /* While we execute this operation the job might go away (for
337          * example: because it is replaced by a new, conflicting
338          * job.) To make sure we don't access a freed job later on we
339          * store the id here, so that we can verify the job is still
340          * valid. */
341         id = j->id;
342         m = j->manager;
343
344         switch (j->type) {
345
346                 case JOB_RELOAD_OR_START:
347                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
348                                 job_change_type(j, JOB_RELOAD);
349                                 r = unit_reload(j->unit);
350                                 break;
351                         }
352                         job_change_type(j, JOB_START);
353                         /* fall through */
354
355                 case JOB_START:
356                         r = unit_start(j->unit);
357
358                         /* If this unit cannot be started, then simply wait */
359                         if (r == -EBADR)
360                                 r = 0;
361                         break;
362
363                 case JOB_VERIFY_ACTIVE: {
364                         UnitActiveState t = unit_active_state(j->unit);
365                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
366                                 r = -EALREADY;
367                         else if (t == UNIT_ACTIVATING)
368                                 r = -EAGAIN;
369                         else
370                                 r = -ENOEXEC;
371                         break;
372                 }
373
374                 case JOB_TRY_RESTART:
375                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
376                                 r = -ENOEXEC;
377                                 break;
378                         }
379                         job_change_type(j, JOB_RESTART);
380                         /* fall through */
381
382                 case JOB_STOP:
383                 case JOB_RESTART:
384                         r = unit_stop(j->unit);
385
386                         /* If this unit cannot stopped, then simply wait. */
387                         if (r == -EBADR)
388                                 r = 0;
389                         break;
390
391                 case JOB_RELOAD:
392                         r = unit_reload(j->unit);
393                         break;
394
395                 default:
396                         assert_not_reached("Unknown job type");
397         }
398
399         if ((j = manager_get_job(m, id))) {
400                 if (r == -EALREADY)
401                         r = job_finish_and_invalidate(j, JOB_DONE);
402                 else if (r == -ENOEXEC)
403                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
404                 else if (r == -EAGAIN)
405                         j->state = JOB_WAITING;
406                 else if (r < 0)
407                         r = job_finish_and_invalidate(j, JOB_FAILED);
408         }
409
410         return r;
411 }
412
413 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
414         assert(u);
415
416         if (t == JOB_START) {
417
418                 switch (result) {
419
420                 case JOB_DONE:
421                         if (u->condition_result)
422                                 unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
423                         break;
424
425                 case JOB_FAILED:
426                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
427                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
428                         break;
429
430                 case JOB_DEPENDENCY:
431                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
432                         break;
433
434                 case JOB_TIMEOUT:
435                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
436                         break;
437
438                 default:
439                         ;
440                 }
441
442         } else if (t == JOB_STOP) {
443
444                 switch (result) {
445
446                 case JOB_TIMEOUT:
447                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
448                         break;
449
450                 case JOB_DONE:
451                 case JOB_FAILED:
452                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
453                         break;
454
455                 default:
456                         ;
457                 }
458         }
459 }
460
461 int job_finish_and_invalidate(Job *j, JobResult result) {
462         Unit *u;
463         Unit *other;
464         JobType t;
465         Iterator i;
466         bool recursed = false;
467
468         assert(j);
469         assert(j->installed);
470
471         job_add_to_dbus_queue(j);
472
473         /* Patch restart jobs so that they become normal start jobs */
474         if (result == JOB_DONE && j->type == JOB_RESTART) {
475
476                 job_change_type(j, JOB_START);
477                 j->state = JOB_WAITING;
478
479                 job_add_to_run_queue(j);
480
481                 u = j->unit;
482                 goto finish;
483         }
484
485         j->result = result;
486
487         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
488
489         if (result == JOB_FAILED)
490                 j->manager->n_failed_jobs ++;
491
492         u = j->unit;
493         t = j->type;
494         job_uninstall(j);
495         job_free(j);
496
497         job_print_status_message(u, t, result);
498
499         /* Fail depending jobs on failure */
500         if (result != JOB_DONE) {
501
502                 if (t == JOB_START ||
503                     t == JOB_VERIFY_ACTIVE ||
504                     t == JOB_RELOAD_OR_START) {
505
506                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
507                                 if (other->job &&
508                                     (other->job->type == JOB_START ||
509                                      other->job->type == JOB_VERIFY_ACTIVE ||
510                                      other->job->type == JOB_RELOAD_OR_START)) {
511                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
512                                         recursed = true;
513                                 }
514
515                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
516                                 if (other->job &&
517                                     (other->job->type == JOB_START ||
518                                      other->job->type == JOB_VERIFY_ACTIVE ||
519                                      other->job->type == JOB_RELOAD_OR_START)) {
520                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
521                                         recursed = true;
522                                 }
523
524                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
525                                 if (other->job &&
526                                     !other->job->override &&
527                                     (other->job->type == JOB_START ||
528                                      other->job->type == JOB_VERIFY_ACTIVE ||
529                                      other->job->type == JOB_RELOAD_OR_START)) {
530                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
531                                         recursed = true;
532                                 }
533
534                 } else if (t == JOB_STOP) {
535
536                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
537                                 if (other->job &&
538                                     (other->job->type == JOB_START ||
539                                      other->job->type == JOB_VERIFY_ACTIVE ||
540                                      other->job->type == JOB_RELOAD_OR_START)) {
541                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
542                                         recursed = true;
543                                 }
544                 }
545         }
546
547         /* Trigger OnFailure dependencies that are not generated by
548          * the unit itself. We don't tread JOB_CANCELED as failure in
549          * this context. And JOB_FAILURE is already handled by the
550          * unit itself. */
551         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
552                 log_notice("Job %s/%s failed with result '%s'.",
553                            u->id,
554                            job_type_to_string(t),
555                            job_result_to_string(result));
556
557                 unit_trigger_on_failure(u);
558         }
559
560 finish:
561         /* Try to start the next jobs that can be started */
562         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
563                 if (other->job)
564                         job_add_to_run_queue(other->job);
565         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
566                 if (other->job)
567                         job_add_to_run_queue(other->job);
568
569         manager_check_finished(u->manager);
570
571         return recursed;
572 }
573
574 int job_start_timer(Job *j) {
575         struct itimerspec its;
576         struct epoll_event ev;
577         int fd, r;
578         assert(j);
579
580         if (j->unit->job_timeout <= 0 ||
581             j->timer_watch.type == WATCH_JOB_TIMER)
582                 return 0;
583
584         assert(j->timer_watch.type == WATCH_INVALID);
585
586         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
587                 r = -errno;
588                 goto fail;
589         }
590
591         zero(its);
592         timespec_store(&its.it_value, j->unit->job_timeout);
593
594         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
595                 r = -errno;
596                 goto fail;
597         }
598
599         zero(ev);
600         ev.data.ptr = &j->timer_watch;
601         ev.events = EPOLLIN;
602
603         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
604                 r = -errno;
605                 goto fail;
606         }
607
608         j->timer_watch.type = WATCH_JOB_TIMER;
609         j->timer_watch.fd = fd;
610         j->timer_watch.data.job = j;
611
612         return 0;
613
614 fail:
615         if (fd >= 0)
616                 close_nointr_nofail(fd);
617
618         return r;
619 }
620
621 void job_add_to_run_queue(Job *j) {
622         assert(j);
623         assert(j->installed);
624
625         if (j->in_run_queue)
626                 return;
627
628         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
629         j->in_run_queue = true;
630 }
631
632 void job_add_to_dbus_queue(Job *j) {
633         assert(j);
634         assert(j->installed);
635
636         if (j->in_dbus_queue)
637                 return;
638
639         /* We don't check if anybody is subscribed here, since this
640          * job might just have been created and not yet assigned to a
641          * connection/client. */
642
643         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
644         j->in_dbus_queue = true;
645 }
646
647 char *job_dbus_path(Job *j) {
648         char *p;
649
650         assert(j);
651
652         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
653                 return NULL;
654
655         return p;
656 }
657
658 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
659         assert(j);
660         assert(w == &j->timer_watch);
661
662         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
663         job_finish_and_invalidate(j, JOB_TIMEOUT);
664 }
665
666 static const char* const job_state_table[_JOB_STATE_MAX] = {
667         [JOB_WAITING] = "waiting",
668         [JOB_RUNNING] = "running"
669 };
670
671 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
672
673 static const char* const job_type_table[_JOB_TYPE_MAX] = {
674         [JOB_START] = "start",
675         [JOB_VERIFY_ACTIVE] = "verify-active",
676         [JOB_STOP] = "stop",
677         [JOB_RELOAD] = "reload",
678         [JOB_RELOAD_OR_START] = "reload-or-start",
679         [JOB_RESTART] = "restart",
680         [JOB_TRY_RESTART] = "try-restart",
681 };
682
683 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
684
685 static const char* const job_mode_table[_JOB_MODE_MAX] = {
686         [JOB_FAIL] = "fail",
687         [JOB_REPLACE] = "replace",
688         [JOB_ISOLATE] = "isolate",
689         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
690         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
691 };
692
693 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
694
695 static const char* const job_result_table[_JOB_RESULT_MAX] = {
696         [JOB_DONE] = "done",
697         [JOB_CANCELED] = "canceled",
698         [JOB_TIMEOUT] = "timeout",
699         [JOB_FAILED] = "failed",
700         [JOB_DEPENDENCY] = "dependency",
701         [JOB_SKIPPED] = "skipped"
702 };
703
704 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);