chiark / gitweb /
aa7cdbff2add4c3522c38641380d6dc95ea69521
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Unit *unit, JobType type) {
37         Job *j;
38
39         assert(type < _JOB_TYPE_MAX);
40         assert(unit);
41
42         if (!(j = new0(Job, 1)))
43                 return NULL;
44
45         j->manager = unit->manager;
46         j->id = j->manager->current_job_id++;
47         j->type = type;
48         j->unit = unit;
49
50         j->timer_watch.type = WATCH_INVALID;
51
52         /* We don't link it here, that's what job_dependency() is for */
53
54         return j;
55 }
56
57 void job_uninstall(Job *j) {
58         assert(j->installed);
59         /* Detach from next 'bigger' objects */
60
61         bus_job_send_removed_signal(j);
62
63         if (j->unit->job == j) {
64                 j->unit->job = NULL;
65                 unit_add_to_gc_queue(j->unit);
66         }
67
68         hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
69         j->installed = false;
70 }
71
72 void job_free(Job *j) {
73         assert(j);
74         assert(!j->installed);
75         assert(!j->transaction_prev);
76         assert(!j->transaction_next);
77         assert(!j->subject_list);
78         assert(!j->object_list);
79
80         if (j->in_run_queue)
81                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
82
83         if (j->in_dbus_queue)
84                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
85
86         if (j->timer_watch.type != WATCH_INVALID) {
87                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
88                 assert(j->timer_watch.data.job == j);
89                 assert(j->timer_watch.fd >= 0);
90
91                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
92                 close_nointr_nofail(j->timer_watch.fd);
93         }
94
95         free(j->bus_client);
96         free(j);
97 }
98
99 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
100         JobDependency *l;
101
102         assert(object);
103
104         /* Adds a new job link, which encodes that the 'subject' job
105          * needs the 'object' job in some way. If 'subject' is NULL
106          * this means the 'anchor' job (i.e. the one the user
107          * explicitly asked for) is the requester. */
108
109         if (!(l = new0(JobDependency, 1)))
110                 return NULL;
111
112         l->subject = subject;
113         l->object = object;
114         l->matters = matters;
115         l->conflicts = conflicts;
116
117         if (subject)
118                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
119
120         LIST_PREPEND(JobDependency, object, object->object_list, l);
121
122         return l;
123 }
124
125 void job_dependency_free(JobDependency *l) {
126         assert(l);
127
128         if (l->subject)
129                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
130
131         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
132
133         free(l);
134 }
135
136 void job_dump(Job *j, FILE*f, const char *prefix) {
137         assert(j);
138         assert(f);
139
140         if (!prefix)
141                 prefix = "";
142
143         fprintf(f,
144                 "%s-> Job %u:\n"
145                 "%s\tAction: %s -> %s\n"
146                 "%s\tState: %s\n"
147                 "%s\tForced: %s\n",
148                 prefix, j->id,
149                 prefix, j->unit->id, job_type_to_string(j->type),
150                 prefix, job_state_to_string(j->state),
151                 prefix, yes_no(j->override));
152 }
153
154 /*
155  * Merging is commutative, so imagine the matrix as symmetric. We store only
156  * its lower triangle to avoid duplication. We don't store the main diagonal,
157  * because A merged with A is simply A.
158  *
159  * Merging is associative! A merged with B merged with C is the same as
160  * A merged with C merged with B.
161  *
162  * Mergeability is transitive! If A can be merged with B and B with C then
163  * A also with C.
164  *
165  * Also, if A merged with B cannot be merged with C, then either A or B cannot
166  * be merged with C either.
167  */
168 static const JobType job_merging_table[] = {
169 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
170 /************************************************************************************************************************************/
171 /*JOB_START          */
172 /*JOB_VERIFY_ACTIVE  */ JOB_START,
173 /*JOB_STOP           */ -1,                  -1,
174 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
175 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
176 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
177 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
178 };
179
180 JobType job_type_lookup_merge(JobType a, JobType b) {
181         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
182         assert(a >= 0 && a < _JOB_TYPE_MAX);
183         assert(b >= 0 && b < _JOB_TYPE_MAX);
184
185         if (a == b)
186                 return a;
187
188         if (a < b) {
189                 JobType tmp = a;
190                 a = b;
191                 b = tmp;
192         }
193
194         return job_merging_table[(a - 1) * a / 2 + b];
195 }
196
197 bool job_type_is_redundant(JobType a, UnitActiveState b) {
198         switch (a) {
199
200         case JOB_START:
201                 return
202                         b == UNIT_ACTIVE ||
203                         b == UNIT_RELOADING;
204
205         case JOB_STOP:
206                 return
207                         b == UNIT_INACTIVE ||
208                         b == UNIT_FAILED;
209
210         case JOB_VERIFY_ACTIVE:
211                 return
212                         b == UNIT_ACTIVE ||
213                         b == UNIT_RELOADING;
214
215         case JOB_RELOAD:
216                 return
217                         b == UNIT_RELOADING;
218
219         case JOB_RELOAD_OR_START:
220                 return
221                         b == UNIT_ACTIVATING ||
222                         b == UNIT_RELOADING;
223
224         case JOB_RESTART:
225                 return
226                         b == UNIT_ACTIVATING;
227
228         case JOB_TRY_RESTART:
229                 return
230                         b == UNIT_ACTIVATING;
231
232         default:
233                 assert_not_reached("Invalid job type");
234         }
235 }
236
237 bool job_is_runnable(Job *j) {
238         Iterator i;
239         Unit *other;
240
241         assert(j);
242         assert(j->installed);
243
244         /* Checks whether there is any job running for the units this
245          * job needs to be running after (in the case of a 'positive'
246          * job type) or before (in the case of a 'negative' job
247          * type. */
248
249         /* First check if there is an override */
250         if (j->ignore_order)
251                 return true;
252
253         if (j->type == JOB_START ||
254             j->type == JOB_VERIFY_ACTIVE ||
255             j->type == JOB_RELOAD ||
256             j->type == JOB_RELOAD_OR_START) {
257
258                 /* Immediate result is that the job is or might be
259                  * started. In this case lets wait for the
260                  * dependencies, regardless whether they are
261                  * starting or stopping something. */
262
263                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
264                         if (other->job)
265                                 return false;
266         }
267
268         /* Also, if something else is being stopped and we should
269          * change state after it, then lets wait. */
270
271         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
272                 if (other->job &&
273                     (other->job->type == JOB_STOP ||
274                      other->job->type == JOB_RESTART ||
275                      other->job->type == JOB_TRY_RESTART))
276                         return false;
277
278         /* This means that for a service a and a service b where b
279          * shall be started after a:
280          *
281          *  start a + start b â†’ 1st step start a, 2nd step start b
282          *  start a + stop b  â†’ 1st step stop b,  2nd step start a
283          *  stop a  + start b â†’ 1st step stop a,  2nd step start b
284          *  stop a  + stop b  â†’ 1st step stop b,  2nd step stop a
285          *
286          *  This has the side effect that restarts are properly
287          *  synchronized too. */
288
289         return true;
290 }
291
292 static void job_change_type(Job *j, JobType newtype) {
293         log_debug("Converting job %s/%s -> %s/%s",
294                   j->unit->id, job_type_to_string(j->type),
295                   j->unit->id, job_type_to_string(newtype));
296
297         j->type = newtype;
298 }
299
300 int job_run_and_invalidate(Job *j) {
301         int r;
302         uint32_t id;
303         Manager *m;
304
305         assert(j);
306         assert(j->installed);
307
308         if (j->in_run_queue) {
309                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
310                 j->in_run_queue = false;
311         }
312
313         if (j->state != JOB_WAITING)
314                 return 0;
315
316         if (!job_is_runnable(j))
317                 return -EAGAIN;
318
319         j->state = JOB_RUNNING;
320         job_add_to_dbus_queue(j);
321
322         /* While we execute this operation the job might go away (for
323          * example: because it is replaced by a new, conflicting
324          * job.) To make sure we don't access a freed job later on we
325          * store the id here, so that we can verify the job is still
326          * valid. */
327         id = j->id;
328         m = j->manager;
329
330         switch (j->type) {
331
332                 case JOB_RELOAD_OR_START:
333                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
334                                 job_change_type(j, JOB_RELOAD);
335                                 r = unit_reload(j->unit);
336                                 break;
337                         }
338                         job_change_type(j, JOB_START);
339                         /* fall through */
340
341                 case JOB_START:
342                         r = unit_start(j->unit);
343
344                         /* If this unit cannot be started, then simply wait */
345                         if (r == -EBADR)
346                                 r = 0;
347                         break;
348
349                 case JOB_VERIFY_ACTIVE: {
350                         UnitActiveState t = unit_active_state(j->unit);
351                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
352                                 r = -EALREADY;
353                         else if (t == UNIT_ACTIVATING)
354                                 r = -EAGAIN;
355                         else
356                                 r = -ENOEXEC;
357                         break;
358                 }
359
360                 case JOB_TRY_RESTART:
361                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
362                                 r = -ENOEXEC;
363                                 break;
364                         }
365                         job_change_type(j, JOB_RESTART);
366                         /* fall through */
367
368                 case JOB_STOP:
369                 case JOB_RESTART:
370                         r = unit_stop(j->unit);
371
372                         /* If this unit cannot stopped, then simply wait. */
373                         if (r == -EBADR)
374                                 r = 0;
375                         break;
376
377                 case JOB_RELOAD:
378                         r = unit_reload(j->unit);
379                         break;
380
381                 default:
382                         assert_not_reached("Unknown job type");
383         }
384
385         if ((j = manager_get_job(m, id))) {
386                 if (r == -EALREADY)
387                         r = job_finish_and_invalidate(j, JOB_DONE);
388                 else if (r == -ENOEXEC)
389                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
390                 else if (r == -EAGAIN)
391                         j->state = JOB_WAITING;
392                 else if (r < 0)
393                         r = job_finish_and_invalidate(j, JOB_FAILED);
394         }
395
396         return r;
397 }
398
399 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
400         assert(u);
401
402         if (t == JOB_START) {
403
404                 switch (result) {
405
406                 case JOB_DONE:
407                         if (u->condition_result)
408                                 unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
409                         break;
410
411                 case JOB_FAILED:
412                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
413                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
414                         break;
415
416                 case JOB_DEPENDENCY:
417                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
418                         break;
419
420                 case JOB_TIMEOUT:
421                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
422                         break;
423
424                 default:
425                         ;
426                 }
427
428         } else if (t == JOB_STOP) {
429
430                 switch (result) {
431
432                 case JOB_TIMEOUT:
433                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
434                         break;
435
436                 case JOB_DONE:
437                 case JOB_FAILED:
438                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
439                         break;
440
441                 default:
442                         ;
443                 }
444         }
445 }
446
447 int job_finish_and_invalidate(Job *j, JobResult result) {
448         Unit *u;
449         Unit *other;
450         JobType t;
451         Iterator i;
452         bool recursed = false;
453
454         assert(j);
455         assert(j->installed);
456
457         job_add_to_dbus_queue(j);
458
459         /* Patch restart jobs so that they become normal start jobs */
460         if (result == JOB_DONE && j->type == JOB_RESTART) {
461
462                 job_change_type(j, JOB_START);
463                 j->state = JOB_WAITING;
464
465                 job_add_to_run_queue(j);
466
467                 u = j->unit;
468                 goto finish;
469         }
470
471         j->result = result;
472
473         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
474
475         if (result == JOB_FAILED)
476                 j->manager->n_failed_jobs ++;
477
478         u = j->unit;
479         t = j->type;
480         job_uninstall(j);
481         job_free(j);
482
483         job_print_status_message(u, t, result);
484
485         /* Fail depending jobs on failure */
486         if (result != JOB_DONE) {
487
488                 if (t == JOB_START ||
489                     t == JOB_VERIFY_ACTIVE ||
490                     t == JOB_RELOAD_OR_START) {
491
492                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
493                                 if (other->job &&
494                                     (other->job->type == JOB_START ||
495                                      other->job->type == JOB_VERIFY_ACTIVE ||
496                                      other->job->type == JOB_RELOAD_OR_START)) {
497                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
498                                         recursed = true;
499                                 }
500
501                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
502                                 if (other->job &&
503                                     (other->job->type == JOB_START ||
504                                      other->job->type == JOB_VERIFY_ACTIVE ||
505                                      other->job->type == JOB_RELOAD_OR_START)) {
506                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
507                                         recursed = true;
508                                 }
509
510                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
511                                 if (other->job &&
512                                     !other->job->override &&
513                                     (other->job->type == JOB_START ||
514                                      other->job->type == JOB_VERIFY_ACTIVE ||
515                                      other->job->type == JOB_RELOAD_OR_START)) {
516                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
517                                         recursed = true;
518                                 }
519
520                 } else if (t == JOB_STOP) {
521
522                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
523                                 if (other->job &&
524                                     (other->job->type == JOB_START ||
525                                      other->job->type == JOB_VERIFY_ACTIVE ||
526                                      other->job->type == JOB_RELOAD_OR_START)) {
527                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
528                                         recursed = true;
529                                 }
530                 }
531         }
532
533         /* Trigger OnFailure dependencies that are not generated by
534          * the unit itself. We don't tread JOB_CANCELED as failure in
535          * this context. And JOB_FAILURE is already handled by the
536          * unit itself. */
537         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
538                 log_notice("Job %s/%s failed with result '%s'.",
539                            u->id,
540                            job_type_to_string(t),
541                            job_result_to_string(result));
542
543                 unit_trigger_on_failure(u);
544         }
545
546 finish:
547         /* Try to start the next jobs that can be started */
548         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
549                 if (other->job)
550                         job_add_to_run_queue(other->job);
551         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
552                 if (other->job)
553                         job_add_to_run_queue(other->job);
554
555         manager_check_finished(u->manager);
556
557         return recursed;
558 }
559
560 int job_start_timer(Job *j) {
561         struct itimerspec its;
562         struct epoll_event ev;
563         int fd, r;
564         assert(j);
565
566         if (j->unit->job_timeout <= 0 ||
567             j->timer_watch.type == WATCH_JOB_TIMER)
568                 return 0;
569
570         assert(j->timer_watch.type == WATCH_INVALID);
571
572         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
573                 r = -errno;
574                 goto fail;
575         }
576
577         zero(its);
578         timespec_store(&its.it_value, j->unit->job_timeout);
579
580         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
581                 r = -errno;
582                 goto fail;
583         }
584
585         zero(ev);
586         ev.data.ptr = &j->timer_watch;
587         ev.events = EPOLLIN;
588
589         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
590                 r = -errno;
591                 goto fail;
592         }
593
594         j->timer_watch.type = WATCH_JOB_TIMER;
595         j->timer_watch.fd = fd;
596         j->timer_watch.data.job = j;
597
598         return 0;
599
600 fail:
601         if (fd >= 0)
602                 close_nointr_nofail(fd);
603
604         return r;
605 }
606
607 void job_add_to_run_queue(Job *j) {
608         assert(j);
609         assert(j->installed);
610
611         if (j->in_run_queue)
612                 return;
613
614         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
615         j->in_run_queue = true;
616 }
617
618 void job_add_to_dbus_queue(Job *j) {
619         assert(j);
620         assert(j->installed);
621
622         if (j->in_dbus_queue)
623                 return;
624
625         /* We don't check if anybody is subscribed here, since this
626          * job might just have been created and not yet assigned to a
627          * connection/client. */
628
629         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
630         j->in_dbus_queue = true;
631 }
632
633 char *job_dbus_path(Job *j) {
634         char *p;
635
636         assert(j);
637
638         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
639                 return NULL;
640
641         return p;
642 }
643
644 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
645         assert(j);
646         assert(w == &j->timer_watch);
647
648         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
649         job_finish_and_invalidate(j, JOB_TIMEOUT);
650 }
651
652 static const char* const job_state_table[_JOB_STATE_MAX] = {
653         [JOB_WAITING] = "waiting",
654         [JOB_RUNNING] = "running"
655 };
656
657 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
658
659 static const char* const job_type_table[_JOB_TYPE_MAX] = {
660         [JOB_START] = "start",
661         [JOB_VERIFY_ACTIVE] = "verify-active",
662         [JOB_STOP] = "stop",
663         [JOB_RELOAD] = "reload",
664         [JOB_RELOAD_OR_START] = "reload-or-start",
665         [JOB_RESTART] = "restart",
666         [JOB_TRY_RESTART] = "try-restart",
667 };
668
669 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
670
671 static const char* const job_mode_table[_JOB_MODE_MAX] = {
672         [JOB_FAIL] = "fail",
673         [JOB_REPLACE] = "replace",
674         [JOB_ISOLATE] = "isolate",
675         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
676         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
677 };
678
679 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
680
681 static const char* const job_result_table[_JOB_RESULT_MAX] = {
682         [JOB_DONE] = "done",
683         [JOB_CANCELED] = "canceled",
684         [JOB_TIMEOUT] = "timeout",
685         [JOB_FAILED] = "failed",
686         [JOB_DEPENDENCY] = "dependency",
687         [JOB_SKIPPED] = "skipped"
688 };
689
690 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);