chiark / gitweb /
manager: fix a crash in isolating
[elogind.git] / src / job.c
index 31e9cfe8d6f937ddf7973548c599f2c0d64283cb..20971da85294f67537e85541c346ccaa4bd5df7e 100644 (file)
--- a/src/job.c
+++ b/src/job.c
@@ -1,4 +1,4 @@
-/*-*- Mode: C; c-basic-offset: 8 -*-*/
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
 
 /***
   This file is part of systemd.
@@ -21,6 +21,8 @@
 
 #include <assert.h>
 #include <errno.h>
+#include <sys/timerfd.h>
+#include <sys/epoll.h>
 
 #include "set.h"
 #include "unit.h"
@@ -46,6 +48,8 @@ Job* job_new(Manager *m, JobType type, Unit *unit) {
         j->type = type;
         j->unit = unit;
 
+        j->timer_watch.type = WATCH_INVALID;
+
         /* We don't link it here, that's what job_dependency() is for */
 
         return j;
@@ -56,7 +60,7 @@ void job_free(Job *j) {
 
         /* Detach from next 'bigger' objects */
         if (j->installed) {
-                bus_job_send_removed_signal(j, !j->failed);
+                bus_job_send_removed_signal(j);
 
                 if (j->unit->meta.job == j) {
                         j->unit->meta.job = NULL;
@@ -76,10 +80,20 @@ void job_free(Job *j) {
         if (j->in_dbus_queue)
                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
 
+        if (j->timer_watch.type != WATCH_INVALID) {
+                assert(j->timer_watch.type == WATCH_JOB_TIMER);
+                assert(j->timer_watch.data.job == j);
+                assert(j->timer_watch.fd >= 0);
+
+                assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
+                close_nointr_nofail(j->timer_watch.fd);
+        }
+
+        free(j->bus_client);
         free(j);
 }
 
-JobDependency* job_dependency_new(Job *subject, Job *object, bool matters) {
+JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) {
         JobDependency *l;
 
         assert(object);
@@ -87,7 +101,7 @@ JobDependency* job_dependency_new(Job *subject, Job *object, bool matters) {
         /* Adds a new job link, which encodes that the 'subject' job
          * needs the 'object' job in some way. If 'subject' is NULL
          * this means the 'anchor' job (i.e. the one the user
-         * explcitily asked for) is the requester. */
+         * explicitly asked for) is the requester. */
 
         if (!(l = new0(JobDependency, 1)))
                 return NULL;
@@ -95,6 +109,7 @@ JobDependency* job_dependency_new(Job *subject, Job *object, bool matters) {
         l->subject = subject;
         l->object = object;
         l->matters = matters;
+        l->conflicts = conflicts;
 
         if (subject)
                 LIST_PREPEND(JobDependency, subject, subject->subject_list, l);
@@ -119,30 +134,6 @@ void job_dependency_free(JobDependency *l) {
         free(l);
 }
 
-void job_dependency_delete(Job *subject, Job *object, bool *matters) {
-        JobDependency *l;
-
-        assert(object);
-
-        LIST_FOREACH(object, l, object->object_list) {
-                assert(l->object == object);
-
-                if (l->subject == subject)
-                        break;
-        }
-
-        if (!l) {
-                if (matters)
-                        *matters = false;
-                return;
-        }
-
-        if (matters)
-                *matters = l->matters;
-
-        job_dependency_free(l);
-}
-
 void job_dump(Job *j, FILE*f, const char *prefix) {
         assert(j);
         assert(f);
@@ -280,7 +271,7 @@ bool job_type_is_redundant(JobType a, UnitActiveState b) {
         case JOB_STOP:
                 return
                         b == UNIT_INACTIVE ||
-                        b == UNIT_MAINTENANCE;
+                        b == UNIT_FAILED;
 
         case JOB_VERIFY_ACTIVE:
                 return
@@ -318,8 +309,12 @@ bool job_is_runnable(Job *j) {
 
         /* Checks whether there is any job running for the units this
          * job needs to be running after (in the case of a 'positive'
-         * job type) or before (in the case of a 'negative' job type
-         * . */
+         * job type) or before (in the case of a 'negative' job
+         * type. */
+
+        /* First check if there is an override */
+        if (j->ignore_order)
+                return true;
 
         if (j->type == JOB_START ||
             j->type == JOB_VERIFY_ACTIVE ||
@@ -362,6 +357,8 @@ bool job_is_runnable(Job *j) {
 
 int job_run_and_invalidate(Job *j) {
         int r;
+        uint32_t id;
+        Manager *m;
 
         assert(j);
         assert(j->installed);
@@ -380,12 +377,24 @@ int job_run_and_invalidate(Job *j) {
         j->state = JOB_RUNNING;
         job_add_to_dbus_queue(j);
 
+        /* While we execute this operation the job might go away (for
+         * example: because it is replaced by a new, conflicting
+         * job.) To make sure we don't access a freed job later on we
+         * store the id here, so that we can verify the job is still
+         * valid. */
+        id = j->id;
+        m = j->manager;
+
         switch (j->type) {
 
                 case JOB_START:
                         r = unit_start(j->unit);
+
+                        /* If this unit cannot be started, then simply
+                         * wait */
                         if (r == -EBADR)
                                 r = 0;
+
                         break;
 
                 case JOB_VERIFY_ACTIVE: {
@@ -401,6 +410,11 @@ int job_run_and_invalidate(Job *j) {
 
                 case JOB_STOP:
                         r = unit_stop(j->unit);
+
+                        /* If this unit cannot stopped, then simply
+                         * wait. */
+                        if (r == -EBADR)
+                                r = 0;
                         break;
 
                 case JOB_RELOAD:
@@ -408,15 +422,21 @@ int job_run_and_invalidate(Job *j) {
                         break;
 
                 case JOB_RELOAD_OR_START:
-                        if (unit_active_state(j->unit) == UNIT_ACTIVE)
+                        if (unit_active_state(j->unit) == UNIT_ACTIVE) {
+                                j->type = JOB_RELOAD;
                                 r = unit_reload(j->unit);
-                        else
+                        } else {
+                                j->type = JOB_START;
                                 r = unit_start(j->unit);
+
+                                if (r == -EBADR)
+                                        r = 0;
+                        }
                         break;
 
                 case JOB_RESTART: {
                         UnitActiveState t = unit_active_state(j->unit);
-                        if (t == UNIT_INACTIVE || t == UNIT_MAINTENANCE || t == UNIT_ACTIVATING) {
+                        if (t == UNIT_INACTIVE || t == UNIT_FAILED || t == UNIT_ACTIVATING) {
                                 j->type = JOB_START;
                                 r = unit_start(j->unit);
                         } else
@@ -426,13 +446,15 @@ int job_run_and_invalidate(Job *j) {
 
                 case JOB_TRY_RESTART: {
                         UnitActiveState t = unit_active_state(j->unit);
-                        if (t == UNIT_INACTIVE || t == UNIT_MAINTENANCE || t == UNIT_DEACTIVATING)
+                        if (t == UNIT_INACTIVE || t == UNIT_FAILED || t == UNIT_DEACTIVATING)
                                 r = -ENOEXEC;
                         else if (t == UNIT_ACTIVATING) {
                                 j->type = JOB_START;
                                 r = unit_start(j->unit);
-                        } else
+                        } else {
+                                j->type = JOB_RESTART;
                                 r = unit_stop(j->unit);
+                        }
                         break;
                 }
 
@@ -440,50 +462,109 @@ int job_run_and_invalidate(Job *j) {
                         assert_not_reached("Unknown job type");
         }
 
-        if (r == -EALREADY)
-                r = job_finish_and_invalidate(j, true);
-        else if (r == -EAGAIN) {
-                j->state = JOB_WAITING;
-                return -EAGAIN;
-        } else if (r < 0)
-                r = job_finish_and_invalidate(j, false);
+        if ((j = manager_get_job(m, id))) {
+                if (r == -EALREADY)
+                        r = job_finish_and_invalidate(j, JOB_DONE);
+                else if (r == -ENOEXEC)
+                        r = job_finish_and_invalidate(j, JOB_SKIPPED);
+                else if (r == -EAGAIN)
+                        j->state = JOB_WAITING;
+                else if (r < 0)
+                        r = job_finish_and_invalidate(j, JOB_FAILED);
+        }
 
         return r;
 }
 
-int job_finish_and_invalidate(Job *j, bool success) {
+static void job_print_status_message(Unit *u, JobType t, JobResult result) {
+        assert(u);
+
+        if (t == JOB_START) {
+
+                switch (result) {
+
+                case JOB_DONE:
+                        unit_status_printf(u, "Started %s.\n", unit_description(u));
+                        break;
+
+                case JOB_FAILED:
+                        unit_status_printf(u, "Starting %s " ANSI_HIGHLIGHT_ON "failed" ANSI_HIGHLIGHT_OFF ", see 'systemctl status %s' for details.\n", unit_description(u), u->meta.id);
+                        break;
+
+                case JOB_DEPENDENCY:
+                        unit_status_printf(u, "Starting %s " ANSI_HIGHLIGHT_ON "aborted" ANSI_HIGHLIGHT_OFF " because a dependency failed.\n", unit_description(u));
+                        break;
+
+                case JOB_TIMEOUT:
+                        unit_status_printf(u, "Starting %s " ANSI_HIGHLIGHT_ON "timed out" ANSI_HIGHLIGHT_OFF ".\n", unit_description(u), u->meta.id);
+                        break;
+
+                default:
+                        ;
+                }
+
+        } else if (t == JOB_STOP) {
+
+                switch (result) {
+
+                case JOB_TIMEOUT:
+                        unit_status_printf(u, "Stopping %s " ANSI_HIGHLIGHT_ON "timed out" ANSI_HIGHLIGHT_OFF ".\n", unit_description(u), u->meta.id);
+                        break;
+
+                case JOB_DONE:
+                case JOB_FAILED:
+                        unit_status_printf(u, "Stopped %s.\n", unit_description(u));
+                        break;
+
+                default:
+                        ;
+                }
+        }
+}
+
+int job_finish_and_invalidate(Job *j, JobResult result) {
         Unit *u;
         Unit *other;
         JobType t;
         Iterator i;
+        bool recursed = false;
 
         assert(j);
         assert(j->installed);
 
-        log_debug("Job %s/%s finished, success=%s", j->unit->meta.id, job_type_to_string(j->type), yes_no(success));
         job_add_to_dbus_queue(j);
 
         /* Patch restart jobs so that they become normal start jobs */
-        if (success && (j->type == JOB_RESTART || j->type == JOB_TRY_RESTART)) {
+        if (result == JOB_DONE && (j->type == JOB_RESTART || j->type == JOB_TRY_RESTART)) {
 
                 log_debug("Converting job %s/%s -> %s/%s",
                           j->unit->meta.id, job_type_to_string(j->type),
                           j->unit->meta.id, job_type_to_string(JOB_START));
 
-                j->state = JOB_RUNNING;
+                j->state = JOB_WAITING;
                 j->type = JOB_START;
 
                 job_add_to_run_queue(j);
-                return 0;
+
+                u = j->unit;
+                goto finish;
         }
 
-        j->failed = !success;
+        j->result = result;
+
+        log_debug("Job %s/%s finished, result=%s", j->unit->meta.id, job_type_to_string(j->type), job_result_to_string(result));
+
+        if (result == JOB_FAILED)
+                j->manager->n_failed_jobs ++;
+
         u = j->unit;
         t = j->type;
         job_free(j);
 
+        job_print_status_message(u, t, result);
+
         /* Fail depending jobs on failure */
-        if (!success) {
+        if (result != JOB_DONE) {
 
                 if (t == JOB_START ||
                     t == JOB_VERIFY_ACTIVE ||
@@ -493,28 +574,57 @@ int job_finish_and_invalidate(Job *j, bool success) {
                                 if (other->meta.job &&
                                     (other->meta.job->type == JOB_START ||
                                      other->meta.job->type == JOB_VERIFY_ACTIVE ||
-                                     other->meta.job->type == JOB_RELOAD_OR_START))
-                                        job_finish_and_invalidate(other->meta.job, false);
+                                     other->meta.job->type == JOB_RELOAD_OR_START)) {
+                                        job_finish_and_invalidate(other->meta.job, JOB_DEPENDENCY);
+                                        recursed = true;
+                                }
+
+                        SET_FOREACH(other, u->meta.dependencies[UNIT_BOUND_BY], i)
+                                if (other->meta.job &&
+                                    (other->meta.job->type == JOB_START ||
+                                     other->meta.job->type == JOB_VERIFY_ACTIVE ||
+                                     other->meta.job->type == JOB_RELOAD_OR_START)) {
+                                        job_finish_and_invalidate(other->meta.job, JOB_DEPENDENCY);
+                                        recursed = true;
+                                }
 
                         SET_FOREACH(other, u->meta.dependencies[UNIT_REQUIRED_BY_OVERRIDABLE], i)
                                 if (other->meta.job &&
                                     !other->meta.job->override &&
                                     (other->meta.job->type == JOB_START ||
                                      other->meta.job->type == JOB_VERIFY_ACTIVE ||
-                                     other->meta.job->type == JOB_RELOAD_OR_START))
-                                        job_finish_and_invalidate(other->meta.job, false);
+                                     other->meta.job->type == JOB_RELOAD_OR_START)) {
+                                        job_finish_and_invalidate(other->meta.job, JOB_DEPENDENCY);
+                                        recursed = true;
+                                }
 
                 } else if (t == JOB_STOP) {
 
-                        SET_FOREACH(other, u->meta.dependencies[UNIT_CONFLICTS], i)
+                        SET_FOREACH(other, u->meta.dependencies[UNIT_CONFLICTED_BY], i)
                                 if (other->meta.job &&
                                     (other->meta.job->type == JOB_START ||
                                      other->meta.job->type == JOB_VERIFY_ACTIVE ||
-                                     other->meta.job->type == JOB_RELOAD_OR_START))
-                                        job_finish_and_invalidate(other->meta.job, false);
+                                     other->meta.job->type == JOB_RELOAD_OR_START)) {
+                                        job_finish_and_invalidate(other->meta.job, JOB_DEPENDENCY);
+                                        recursed = true;
+                                }
                 }
         }
 
+        /* Trigger OnFailure dependencies that are not generated by
+         * the unit itself. We don't tread JOB_CANCELED as failure in
+         * this context. And JOB_FAILURE is already handled by the
+         * unit itself. */
+        if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
+                log_notice("Job %s/%s failed with result '%s'.",
+                           u->meta.id,
+                           job_type_to_string(t),
+                           job_result_to_string(result));
+
+                unit_trigger_on_failure(u);
+        }
+
+finish:
         /* Try to start the next jobs that can be started */
         SET_FOREACH(other, u->meta.dependencies[UNIT_AFTER], i)
                 if (other->meta.job)
@@ -523,7 +633,56 @@ int job_finish_and_invalidate(Job *j, bool success) {
                 if (other->meta.job)
                         job_add_to_run_queue(other->meta.job);
 
+        manager_check_finished(u->meta.manager);
+
+        return recursed;
+}
+
+int job_start_timer(Job *j) {
+        struct itimerspec its;
+        struct epoll_event ev;
+        int fd, r;
+        assert(j);
+
+        if (j->unit->meta.job_timeout <= 0 ||
+            j->timer_watch.type == WATCH_JOB_TIMER)
+                return 0;
+
+        assert(j->timer_watch.type == WATCH_INVALID);
+
+        if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        zero(its);
+        timespec_store(&its.it_value, j->unit->meta.job_timeout);
+
+        if (timerfd_settime(fd, 0, &its, NULL) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        zero(ev);
+        ev.data.ptr = &j->timer_watch;
+        ev.events = EPOLLIN;
+
+        if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        j->timer_watch.type = WATCH_JOB_TIMER;
+        j->timer_watch.fd = fd;
+        j->timer_watch.data.job = j;
+
         return 0;
+
+fail:
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        return r;
 }
 
 void job_add_to_run_queue(Job *j) {
@@ -544,10 +703,9 @@ void job_add_to_dbus_queue(Job *j) {
         if (j->in_dbus_queue)
                 return;
 
-        if (set_isempty(j->manager->subscribed)) {
-                j->sent_dbus_new_signal = true;
-                return;
-        }
+        /* We don't check if anybody is subscribed here, since this
+         * job might just have been created and not yet assigned to a
+         * connection/client. */
 
         LIST_PREPEND(Job, dbus_queue, j->manager->dbus_job_queue, j);
         j->in_dbus_queue = true;
@@ -564,6 +722,14 @@ char *job_dbus_path(Job *j) {
         return p;
 }
 
+void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
+        assert(j);
+        assert(w == &j->timer_watch);
+
+        log_warning("Job %s/%s timed out.", j->unit->meta.id, job_type_to_string(j->type));
+        job_finish_and_invalidate(j, JOB_TIMEOUT);
+}
+
 static const char* const job_state_table[_JOB_STATE_MAX] = {
         [JOB_WAITING] = "waiting",
         [JOB_RUNNING] = "running"
@@ -586,7 +752,20 @@ DEFINE_STRING_TABLE_LOOKUP(job_type, JobType);
 static const char* const job_mode_table[_JOB_MODE_MAX] = {
         [JOB_FAIL] = "fail",
         [JOB_REPLACE] = "replace",
-        [JOB_ISOLATE] = "isolate"
+        [JOB_ISOLATE] = "isolate",
+        [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies",
+        [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements"
 };
 
 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
+
+static const char* const job_result_table[_JOB_RESULT_MAX] = {
+        [JOB_DONE] = "done",
+        [JOB_CANCELED] = "canceled",
+        [JOB_TIMEOUT] = "timeout",
+        [JOB_FAILED] = "failed",
+        [JOB_DEPENDENCY] = "dependency",
+        [JOB_SKIPPED] = "skipped"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);