chiark / gitweb /
18ec823ebede3d0f9db04d2c0cd55609217a340f
[elogind.git] / src / core / job.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <sys/timerfd.h>
25 #include <sys/epoll.h>
26
27 #include "set.h"
28 #include "unit.h"
29 #include "macro.h"
30 #include "strv.h"
31 #include "load-fragment.h"
32 #include "load-dropin.h"
33 #include "log.h"
34 #include "dbus-job.h"
35
36 Job* job_new(Unit *unit, JobType type) {
37         Job *j;
38
39         assert(type < _JOB_TYPE_MAX);
40         assert(unit);
41
42         if (!(j = new0(Job, 1)))
43                 return NULL;
44
45         j->manager = unit->manager;
46         j->id = j->manager->current_job_id++;
47         j->type = type;
48         j->unit = unit;
49
50         j->timer_watch.type = WATCH_INVALID;
51
52         /* We don't link it here, that's what job_dependency() is for */
53
54         return j;
55 }
56
57 void job_uninstall(Job *j) {
58         assert(j->installed);
59         /* Detach from next 'bigger' objects */
60
61         bus_job_send_removed_signal(j);
62
63         if (j->unit->job == j) {
64                 j->unit->job = NULL;
65                 unit_add_to_gc_queue(j->unit);
66         }
67
68         hashmap_remove(j->manager->jobs, UINT32_TO_PTR(j->id));
69         j->installed = false;
70 }
71
72 void job_free(Job *j) {
73         assert(j);
74         assert(!j->installed);
75         assert(!j->transaction_prev);
76         assert(!j->transaction_next);
77         assert(!j->subject_list);
78         assert(!j->object_list);
79
80         if (j->in_run_queue)
81                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
82
83         if (j->in_dbus_queue)
84                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
85
86         if (j->timer_watch.type != WATCH_INVALID) {
87                 assert(j->timer_watch.type == WATCH_JOB_TIMER);
88                 assert(j->timer_watch.data.job == j);
89                 assert(j->timer_watch.fd >= 0);
90
91                 assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
92                 close_nointr_nofail(j->timer_watch.fd);
93         }
94
95         free(j->bus_client);
96         free(j);
97 }
98
99 JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
100         JobDependency *l;
101
102         assert(object);
103
104         /* Adds a new job link, which encodes that the 'subject' job
105          * needs the 'object' job in some way. If 'subject' is NULL
106          * this means the 'anchor' job (i.e. the one the user
107          * explicitly asked for) is the requester. */
108
109         if (!(l = new0(JobDependency, 1)))
110                 return NULL;
111
112         l->subject = subject;
113         l->object = object;
114         l->matters = matters;
115         l->conflicts = conflicts;
116
117         if (subject)
118                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
119
120         LIST_PREPEND(JobDependency, object, object->object_list, l);
121
122         return l;
123 }
124
125 void job_dependency_free(JobDependency *l) {
126         assert(l);
127
128         if (l->subject)
129                 LIST_REMOVE(JobDependency, subject, l->subject->subject_list, l);
130
131         LIST_REMOVE(JobDependency, object, l->object->object_list, l);
132
133         free(l);
134 }
135
136 void job_dump(Job *j, FILE*f, const char *prefix) {
137         assert(j);
138         assert(f);
139
140         if (!prefix)
141                 prefix = "";
142
143         fprintf(f,
144                 "%s-> Job %u:\n"
145                 "%s\tAction: %s -> %s\n"
146                 "%s\tState: %s\n"
147                 "%s\tForced: %s\n",
148                 prefix, j->id,
149                 prefix, j->unit->id, job_type_to_string(j->type),
150                 prefix, job_state_to_string(j->state),
151                 prefix, yes_no(j->override));
152 }
153
154 bool job_is_anchor(Job *j) {
155         JobDependency *l;
156
157         assert(j);
158
159         LIST_FOREACH(object, l, j->object_list)
160                 if (!l->subject)
161                         return true;
162
163         return false;
164 }
165
166 /*
167  * Merging is commutative, so imagine the matrix as symmetric. We store only
168  * its lower triangle to avoid duplication. We don't store the main diagonal,
169  * because A merged with A is simply A.
170  *
171  * Merging is associative! A merged with B merged with C is the same as
172  * A merged with C merged with B.
173  *
174  * Mergeability is transitive! If A can be merged with B and B with C then
175  * A also with C.
176  *
177  * Also, if A merged with B cannot be merged with C, then either A or B cannot
178  * be merged with C either.
179  */
180 static const JobType job_merging_table[] = {
181 /* What \ With       *  JOB_START         JOB_VERIFY_ACTIVE  JOB_STOP JOB_RELOAD   JOB_RELOAD_OR_START  JOB_RESTART JOB_TRY_RESTART */
182 /************************************************************************************************************************************/
183 /*JOB_START          */
184 /*JOB_VERIFY_ACTIVE  */ JOB_START,
185 /*JOB_STOP           */ -1,                  -1,
186 /*JOB_RELOAD         */ JOB_RELOAD_OR_START, JOB_RELOAD,          -1,
187 /*JOB_RELOAD_OR_START*/ JOB_RELOAD_OR_START, JOB_RELOAD_OR_START, -1, JOB_RELOAD_OR_START,
188 /*JOB_RESTART        */ JOB_RESTART,         JOB_RESTART,         -1, JOB_RESTART,         JOB_RESTART,
189 /*JOB_TRY_RESTART    */ JOB_RESTART,         JOB_TRY_RESTART,     -1, JOB_TRY_RESTART,     JOB_RESTART, JOB_RESTART,
190 };
191
192 JobType job_type_lookup_merge(JobType a, JobType b) {
193         assert_cc(ELEMENTSOF(job_merging_table) == _JOB_TYPE_MAX * (_JOB_TYPE_MAX - 1) / 2);
194         assert(a >= 0 && a < _JOB_TYPE_MAX);
195         assert(b >= 0 && b < _JOB_TYPE_MAX);
196
197         if (a == b)
198                 return a;
199
200         if (a < b) {
201                 JobType tmp = a;
202                 a = b;
203                 b = tmp;
204         }
205
206         return job_merging_table[(a - 1) * a / 2 + b];
207 }
208
209 bool job_type_is_redundant(JobType a, UnitActiveState b) {
210         switch (a) {
211
212         case JOB_START:
213                 return
214                         b == UNIT_ACTIVE ||
215                         b == UNIT_RELOADING;
216
217         case JOB_STOP:
218                 return
219                         b == UNIT_INACTIVE ||
220                         b == UNIT_FAILED;
221
222         case JOB_VERIFY_ACTIVE:
223                 return
224                         b == UNIT_ACTIVE ||
225                         b == UNIT_RELOADING;
226
227         case JOB_RELOAD:
228                 return
229                         b == UNIT_RELOADING;
230
231         case JOB_RELOAD_OR_START:
232                 return
233                         b == UNIT_ACTIVATING ||
234                         b == UNIT_RELOADING;
235
236         case JOB_RESTART:
237                 return
238                         b == UNIT_ACTIVATING;
239
240         case JOB_TRY_RESTART:
241                 return
242                         b == UNIT_ACTIVATING;
243
244         default:
245                 assert_not_reached("Invalid job type");
246         }
247 }
248
249 bool job_is_runnable(Job *j) {
250         Iterator i;
251         Unit *other;
252
253         assert(j);
254         assert(j->installed);
255
256         /* Checks whether there is any job running for the units this
257          * job needs to be running after (in the case of a 'positive'
258          * job type) or before (in the case of a 'negative' job
259          * type. */
260
261         /* First check if there is an override */
262         if (j->ignore_order)
263                 return true;
264
265         if (j->type == JOB_START ||
266             j->type == JOB_VERIFY_ACTIVE ||
267             j->type == JOB_RELOAD ||
268             j->type == JOB_RELOAD_OR_START) {
269
270                 /* Immediate result is that the job is or might be
271                  * started. In this case lets wait for the
272                  * dependencies, regardless whether they are
273                  * starting or stopping something. */
274
275                 SET_FOREACH(other, j->unit->dependencies[UNIT_AFTER], i)
276                         if (other->job)
277                                 return false;
278         }
279
280         /* Also, if something else is being stopped and we should
281          * change state after it, then lets wait. */
282
283         SET_FOREACH(other, j->unit->dependencies[UNIT_BEFORE], i)
284                 if (other->job &&
285                     (other->job->type == JOB_STOP ||
286                      other->job->type == JOB_RESTART ||
287                      other->job->type == JOB_TRY_RESTART))
288                         return false;
289
290         /* This means that for a service a and a service b where b
291          * shall be started after a:
292          *
293          *  start a + start b â†’ 1st step start a, 2nd step start b
294          *  start a + stop b  â†’ 1st step stop b,  2nd step start a
295          *  stop a  + start b â†’ 1st step stop a,  2nd step start b
296          *  stop a  + stop b  â†’ 1st step stop b,  2nd step stop a
297          *
298          *  This has the side effect that restarts are properly
299          *  synchronized too. */
300
301         return true;
302 }
303
304 static void job_change_type(Job *j, JobType newtype) {
305         log_debug("Converting job %s/%s -> %s/%s",
306                   j->unit->id, job_type_to_string(j->type),
307                   j->unit->id, job_type_to_string(newtype));
308
309         j->type = newtype;
310 }
311
312 int job_run_and_invalidate(Job *j) {
313         int r;
314         uint32_t id;
315         Manager *m;
316
317         assert(j);
318         assert(j->installed);
319
320         if (j->in_run_queue) {
321                 LIST_REMOVE(Job, run_queue, j->manager->run_queue, j);
322                 j->in_run_queue = false;
323         }
324
325         if (j->state != JOB_WAITING)
326                 return 0;
327
328         if (!job_is_runnable(j))
329                 return -EAGAIN;
330
331         j->state = JOB_RUNNING;
332         job_add_to_dbus_queue(j);
333
334         /* While we execute this operation the job might go away (for
335          * example: because it is replaced by a new, conflicting
336          * job.) To make sure we don't access a freed job later on we
337          * store the id here, so that we can verify the job is still
338          * valid. */
339         id = j->id;
340         m = j->manager;
341
342         switch (j->type) {
343
344                 case JOB_RELOAD_OR_START:
345                         if (unit_active_state(j->unit) == UNIT_ACTIVE) {
346                                 job_change_type(j, JOB_RELOAD);
347                                 r = unit_reload(j->unit);
348                                 break;
349                         }
350                         job_change_type(j, JOB_START);
351                         /* fall through */
352
353                 case JOB_START:
354                         r = unit_start(j->unit);
355
356                         /* If this unit cannot be started, then simply wait */
357                         if (r == -EBADR)
358                                 r = 0;
359                         break;
360
361                 case JOB_VERIFY_ACTIVE: {
362                         UnitActiveState t = unit_active_state(j->unit);
363                         if (UNIT_IS_ACTIVE_OR_RELOADING(t))
364                                 r = -EALREADY;
365                         else if (t == UNIT_ACTIVATING)
366                                 r = -EAGAIN;
367                         else
368                                 r = -ENOEXEC;
369                         break;
370                 }
371
372                 case JOB_TRY_RESTART:
373                         if (UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(j->unit))) {
374                                 r = -ENOEXEC;
375                                 break;
376                         }
377                         job_change_type(j, JOB_RESTART);
378                         /* fall through */
379
380                 case JOB_STOP:
381                 case JOB_RESTART:
382                         r = unit_stop(j->unit);
383
384                         /* If this unit cannot stopped, then simply wait. */
385                         if (r == -EBADR)
386                                 r = 0;
387                         break;
388
389                 case JOB_RELOAD:
390                         r = unit_reload(j->unit);
391                         break;
392
393                 default:
394                         assert_not_reached("Unknown job type");
395         }
396
397         if ((j = manager_get_job(m, id))) {
398                 if (r == -EALREADY)
399                         r = job_finish_and_invalidate(j, JOB_DONE);
400                 else if (r == -ENOEXEC)
401                         r = job_finish_and_invalidate(j, JOB_SKIPPED);
402                 else if (r == -EAGAIN)
403                         j->state = JOB_WAITING;
404                 else if (r < 0)
405                         r = job_finish_and_invalidate(j, JOB_FAILED);
406         }
407
408         return r;
409 }
410
411 static void job_print_status_message(Unit *u, JobType t, JobResult result) {
412         assert(u);
413
414         if (t == JOB_START) {
415
416                 switch (result) {
417
418                 case JOB_DONE:
419                         if (u->condition_result)
420                                 unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Started %s", unit_description(u));
421                         break;
422
423                 case JOB_FAILED:
424                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON "FAILED" ANSI_HIGHLIGHT_OFF, "Failed to start %s", unit_description(u));
425                         unit_status_printf(u, NULL, "See 'systemctl status %s' for details.", u->id);
426                         break;
427
428                 case JOB_DEPENDENCY:
429                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " ABORT" ANSI_HIGHLIGHT_OFF, "Dependency failed. Aborted start of %s", unit_description(u));
430                         break;
431
432                 case JOB_TIMEOUT:
433                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out starting %s", unit_description(u));
434                         break;
435
436                 default:
437                         ;
438                 }
439
440         } else if (t == JOB_STOP) {
441
442                 switch (result) {
443
444                 case JOB_TIMEOUT:
445                         unit_status_printf(u, ANSI_HIGHLIGHT_RED_ON " TIME " ANSI_HIGHLIGHT_OFF, "Timed out stopping %s", unit_description(u));
446                         break;
447
448                 case JOB_DONE:
449                 case JOB_FAILED:
450                         unit_status_printf(u, ANSI_HIGHLIGHT_GREEN_ON "  OK  " ANSI_HIGHLIGHT_OFF, "Stopped %s", unit_description(u));
451                         break;
452
453                 default:
454                         ;
455                 }
456         }
457 }
458
459 int job_finish_and_invalidate(Job *j, JobResult result) {
460         Unit *u;
461         Unit *other;
462         JobType t;
463         Iterator i;
464         bool recursed = false;
465
466         assert(j);
467         assert(j->installed);
468
469         job_add_to_dbus_queue(j);
470
471         /* Patch restart jobs so that they become normal start jobs */
472         if (result == JOB_DONE && j->type == JOB_RESTART) {
473
474                 job_change_type(j, JOB_START);
475                 j->state = JOB_WAITING;
476
477                 job_add_to_run_queue(j);
478
479                 u = j->unit;
480                 goto finish;
481         }
482
483         j->result = result;
484
485         log_debug("Job %s/%s finished, result=%s", j->unit->id, job_type_to_string(j->type), job_result_to_string(result));
486
487         if (result == JOB_FAILED)
488                 j->manager->n_failed_jobs ++;
489
490         u = j->unit;
491         t = j->type;
492         job_uninstall(j);
493         job_free(j);
494
495         job_print_status_message(u, t, result);
496
497         /* Fail depending jobs on failure */
498         if (result != JOB_DONE) {
499
500                 if (t == JOB_START ||
501                     t == JOB_VERIFY_ACTIVE ||
502                     t == JOB_RELOAD_OR_START) {
503
504                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY], i)
505                                 if (other->job &&
506                                     (other->job->type == JOB_START ||
507                                      other->job->type == JOB_VERIFY_ACTIVE ||
508                                      other->job->type == JOB_RELOAD_OR_START)) {
509                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
510                                         recursed = true;
511                                 }
512
513                         SET_FOREACH(other, u->dependencies[UNIT_BOUND_BY], i)
514                                 if (other->job &&
515                                     (other->job->type == JOB_START ||
516                                      other->job->type == JOB_VERIFY_ACTIVE ||
517                                      other->job->type == JOB_RELOAD_OR_START)) {
518                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
519                                         recursed = true;
520                                 }
521
522                         SET_FOREACH(other, u->dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
523                                 if (other->job &&
524                                     !other->job->override &&
525                                     (other->job->type == JOB_START ||
526                                      other->job->type == JOB_VERIFY_ACTIVE ||
527                                      other->job->type == JOB_RELOAD_OR_START)) {
528                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
529                                         recursed = true;
530                                 }
531
532                 } else if (t == JOB_STOP) {
533
534                         SET_FOREACH(other, u->dependencies[UNIT_CONFLICTED_BY], i)
535                                 if (other->job &&
536                                     (other->job->type == JOB_START ||
537                                      other->job->type == JOB_VERIFY_ACTIVE ||
538                                      other->job->type == JOB_RELOAD_OR_START)) {
539                                         job_finish_and_invalidate(other->job, JOB_DEPENDENCY);
540                                         recursed = true;
541                                 }
542                 }
543         }
544
545         /* Trigger OnFailure dependencies that are not generated by
546          * the unit itself. We don't tread JOB_CANCELED as failure in
547          * this context. And JOB_FAILURE is already handled by the
548          * unit itself. */
549         if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
550                 log_notice("Job %s/%s failed with result '%s'.",
551                            u->id,
552                            job_type_to_string(t),
553                            job_result_to_string(result));
554
555                 unit_trigger_on_failure(u);
556         }
557
558 finish:
559         /* Try to start the next jobs that can be started */
560         SET_FOREACH(other, u->dependencies[UNIT_AFTER], i)
561                 if (other->job)
562                         job_add_to_run_queue(other->job);
563         SET_FOREACH(other, u->dependencies[UNIT_BEFORE], i)
564                 if (other->job)
565                         job_add_to_run_queue(other->job);
566
567         manager_check_finished(u->manager);
568
569         return recursed;
570 }
571
572 int job_start_timer(Job *j) {
573         struct itimerspec its;
574         struct epoll_event ev;
575         int fd, r;
576         assert(j);
577
578         if (j->unit->job_timeout <= 0 ||
579             j->timer_watch.type == WATCH_JOB_TIMER)
580                 return 0;
581
582         assert(j->timer_watch.type == WATCH_INVALID);
583
584         if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
585                 r = -errno;
586                 goto fail;
587         }
588
589         zero(its);
590         timespec_store(&its.it_value, j->unit->job_timeout);
591
592         if (timerfd_settime(fd, 0, &its, NULL) < 0) {
593                 r = -errno;
594                 goto fail;
595         }
596
597         zero(ev);
598         ev.data.ptr = &j->timer_watch;
599         ev.events = EPOLLIN;
600
601         if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
602                 r = -errno;
603                 goto fail;
604         }
605
606         j->timer_watch.type = WATCH_JOB_TIMER;
607         j->timer_watch.fd = fd;
608         j->timer_watch.data.job = j;
609
610         return 0;
611
612 fail:
613         if (fd >= 0)
614                 close_nointr_nofail(fd);
615
616         return r;
617 }
618
619 void job_add_to_run_queue(Job *j) {
620         assert(j);
621         assert(j->installed);
622
623         if (j->in_run_queue)
624                 return;
625
626         LIST_PREPEND(Job, run_queue, j->manager->run_queue, j);
627         j->in_run_queue = true;
628 }
629
630 void job_add_to_dbus_queue(Job *j) {
631         assert(j);
632         assert(j->installed);
633
634         if (j->in_dbus_queue)
635                 return;
636
637         /* We don't check if anybody is subscribed here, since this
638          * job might just have been created and not yet assigned to a
639          * connection/client. */
640
641         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
642         j->in_dbus_queue = true;
643 }
644
645 char *job_dbus_path(Job *j) {
646         char *p;
647
648         assert(j);
649
650         if (asprintf(&p, "/org/freedesktop/systemd1/job/%lu", (unsigned long) j->id) < 0)
651                 return NULL;
652
653         return p;
654 }
655
656 void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
657         assert(j);
658         assert(w == &j->timer_watch);
659
660         log_warning("Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
661         job_finish_and_invalidate(j, JOB_TIMEOUT);
662 }
663
664 static const char* const job_state_table[_JOB_STATE_MAX] = {
665         [JOB_WAITING] = "waiting",
666         [JOB_RUNNING] = "running"
667 };
668
669 DEFINE_STRING_TABLE_LOOKUP(job_state, JobState);
670
671 static const char* const job_type_table[_JOB_TYPE_MAX] = {
672         [JOB_START] = "start",
673         [JOB_VERIFY_ACTIVE] = "verify-active",
674         [JOB_STOP] = "stop",
675         [JOB_RELOAD] = "reload",
676         [JOB_RELOAD_OR_START] = "reload-or-start",
677         [JOB_RESTART] = "restart",
678         [JOB_TRY_RESTART] = "try-restart",
679 };
680
681 DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
682
683 static const char* const job_mode_table[_JOB_MODE_MAX] = {
684         [JOB_FAIL] = "fail",
685         [JOB_REPLACE] = "replace",
686         [JOB_ISOLATE] = "isolate",
687         [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
688         [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
689 };
690
691 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
692
693 static const char* const job_result_table[_JOB_RESULT_MAX] = {
694         [JOB_DONE] = "done",
695         [JOB_CANCELED] = "canceled",
696         [JOB_TIMEOUT] = "timeout",
697         [JOB_FAILED] = "failed",
698         [JOB_DEPENDENCY] = "dependency",
699         [JOB_SKIPPED] = "skipped"
700 };
701
702 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);