chiark / gitweb /
job: timeout every job independently of the unit
authorLennart Poettering <lennart@poettering.net>
Sat, 17 Jul 2010 02:09:28 +0000 (04:09 +0200)
committerLennart Poettering <lennart@poettering.net>
Sat, 17 Jul 2010 02:09:28 +0000 (04:09 +0200)
fixme
src/dbus-unit.h
src/device.c
src/job.c
src/job.h
src/load-fragment.c
src/manager.c
src/manager.h
src/unit.c
src/unit.h

diff --git a/fixme b/fixme
index 8e769a28f12f685a0ddd37e6021fee7a0cecce8d..10573058bb9a827e51831a24ea3a874496873076 100644 (file)
--- a/fixme
+++ b/fixme
@@ -35,8 +35,6 @@
 
 * systemctl status $PID, systemctl stop $PID!
 
-* timeout waiting for mount devices?
-
 * place /etc/inittab with explaining blurb.
 
 * In command lines, support both "$FOO" and $FOO
index e93d65892a5793362ea7fe1674e149b6e67663f0..0b9c62f86ca3fef1e8ec9c0c71c8592f9e74f40d 100644 (file)
@@ -90,6 +90,7 @@
         "  <property name=\"DefaultControlGroup\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"ControlGroups\" type=\"as\" access=\"read\"/>\n" \
         "  <property name=\"NeedDaemonReload\" type=\"b\" access=\"read\"/>\n" \
+        "  <property name=\"JobTimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
         " </interface>\n"
 
 #define BUS_UNIT_PROPERTIES \
         { "org.freedesktop.systemd1.Unit", "DefaultDependencies",  bus_property_append_bool,       "b",    &u->meta.default_dependencies     }, \
         { "org.freedesktop.systemd1.Unit", "DefaultControlGroup",  bus_unit_append_default_cgroup, "s",    u                                 }, \
         { "org.freedesktop.systemd1.Unit", "ControlGroups",        bus_unit_append_cgroups,        "as",   u                                 }, \
-        { "org.freedesktop.systemd1.Unit", "NeedDaemonReload",     bus_unit_append_need_daemon_reload, "b", u                                }
+        { "org.freedesktop.systemd1.Unit", "NeedDaemonReload",     bus_unit_append_need_daemon_reload, "b", u                                }, \
+        { "org.freedesktop.systemd1.Unit", "JobTimeoutUSec",       bus_property_append_usec,       "t",    &u->meta.job_timeout              }
 
 int bus_unit_append_names(Manager *m, DBusMessageIter *i, const char *property, void *data);
 int bus_unit_append_dependencies(Manager *m, DBusMessageIter *i, const char *property, void *data);
index dc626d8189777f035c31212f1f68237de19d755d..39ab29110374058a125e94d064f7991219cf91d0 100644 (file)
@@ -35,6 +35,15 @@ static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = {
         [DEVICE_PLUGGED] = UNIT_ACTIVE
 };
 
+static void device_init(Unit *u) {
+        Device *d = DEVICE(u);
+
+        assert(d);
+        assert(d->meta.load_state == UNIT_STUB);
+
+        d->meta.job_timeout = DEFAULT_TIMEOUT_USEC;
+}
+
 static void device_done(Unit *u) {
         Device *d = DEVICE(u);
 
@@ -456,6 +465,8 @@ const UnitVTable device_vtable = {
         .no_snapshots = true,
         .no_isolate = true,
 
+        .init = device_init,
+
         .load = unit_load_fragment_and_dropin_optional,
         .done = device_done,
         .coldplug = device_coldplug,
index 8cc9d742ede5dcba0bcee7da0c6b1e801c76b5ee..2b422b48773918235fee6e9f0a138099c9bc338b 100644 (file)
--- a/src/job.c
+++ b/src/job.c
@@ -21,6 +21,8 @@
 
 #include <assert.h>
 #include <errno.h>
+#include <sys/timerfd.h>
+#include <sys/epoll.h>
 
 #include "set.h"
 #include "unit.h"
@@ -46,6 +48,8 @@ Job* job_new(Manager *m, JobType type, Unit *unit) {
         j->type = type;
         j->unit = unit;
 
+        j->timer_watch.type = WATCH_INVALID;
+
         /* We don't link it here, that's what job_dependency() is for */
 
         return j;
@@ -76,6 +80,15 @@ void job_free(Job *j) {
         if (j->in_dbus_queue)
                 LIST_REMOVE(Job, dbus_queue, j->manager->dbus_job_queue, j);
 
+        if (j->timer_watch.type != WATCH_INVALID) {
+                assert(j->timer_watch.type == WATCH_JOB_TIMER);
+                assert(j->timer_watch.data.job == j);
+                assert(j->timer_watch.fd >= 0);
+
+                assert_se(epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_DEL, j->timer_watch.fd, NULL) >= 0);
+                close_nointr_nofail(j->timer_watch.fd);
+        }
+
         free(j->bus_client);
         free(j);
 }
@@ -472,8 +485,6 @@ int job_finish_and_invalidate(Job *j, bool success) {
 
                 j->state = JOB_WAITING;
                 j->type = JOB_START;
-
-                job_add_to_run_queue(j);
                 return 0;
         }
 
@@ -534,6 +545,53 @@ int job_finish_and_invalidate(Job *j, bool success) {
         return 0;
 }
 
+int job_start_timer(Job *j) {
+        struct itimerspec its;
+        struct epoll_event ev;
+        int fd, r;
+        assert(j);
+
+        if (j->unit->meta.job_timeout <= 0 ||
+            j->timer_watch.type == WATCH_JOB_TIMER)
+                return 0;
+
+        assert(j->timer_watch.type == WATCH_INVALID);
+
+        if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        zero(its);
+        timespec_store(&its.it_value, j->unit->meta.job_timeout);
+
+        if (timerfd_settime(fd, 0, &its, NULL) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        zero(ev);
+        ev.data.ptr = &j->timer_watch;
+        ev.events = EPOLLIN;
+
+        if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        j->timer_watch.type = WATCH_JOB_TIMER;
+        j->timer_watch.fd = fd;
+        j->timer_watch.data.job = j;
+
+        return 0;
+
+fail:
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        return r;
+}
+
 void job_add_to_run_queue(Job *j) {
         assert(j);
         assert(j->installed);
@@ -571,6 +629,14 @@ char *job_dbus_path(Job *j) {
         return p;
 }
 
+void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) {
+        assert(j);
+        assert(w == &j->timer_watch);
+
+        log_warning("Job %s/%s timed out.", j->unit->meta.id, job_type_to_string(j->type));
+        job_finish_and_invalidate(j, false);
+}
+
 static const char* const job_state_table[_JOB_STATE_MAX] = {
         [JOB_WAITING] = "waiting",
         [JOB_RUNNING] = "running"
index 9c685f1dad9c6f0d532b308cc2d23269b17eaadc..41d697e842b04778ba1f2128f8d01387da17a7fc 100644 (file)
--- a/src/job.h
+++ b/src/job.h
@@ -102,6 +102,8 @@ struct Job {
         JobType type;
         JobState state;
 
+        Watch timer_watch;
+
         /* Note that this bus object is not ref counted here. */
         DBusConnection *bus;
         char *bus_client;
@@ -138,9 +140,14 @@ bool job_is_runnable(Job *j);
 void job_add_to_run_queue(Job *j);
 void job_add_to_dbus_queue(Job *j);
 
+int job_start_timer(Job *j);
+void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w);
+
 int job_run_and_invalidate(Job *j);
 int job_finish_and_invalidate(Job *j, bool success);
 
+char *job_dbus_path(Job *j);
+
 const char* job_type_to_string(JobType t);
 JobType job_type_from_string(const char *s);
 
@@ -150,6 +157,4 @@ JobState job_state_from_string(const char *s);
 const char* job_mode_to_string(JobMode t);
 JobMode job_mode_from_string(const char *s);
 
-char *job_dbus_path(Job *j);
-
 #endif
index a2974cbeaded5bb595c610c16fdbd259276f1823..50008101a1d2e81c2d594166d41df27ef8cad4a5 100644 (file)
@@ -1565,6 +1565,7 @@ static int load_from_path(Unit *u, const char *path) {
                 { "OnlyByDependency",       config_parse_bool,            &u->meta.only_by_dependency,                     "Unit"    },
                 { "DefaultDependencies",    config_parse_bool,            &u->meta.default_dependencies,                   "Unit"    },
                 { "IgnoreDependencyFailure",config_parse_bool,            &u->meta.ignore_dependency_failure,              "Unit"    },
+                { "JobTimeoutSec",          config_parse_usec,            &u->meta.job_timeout,                            "Unit"    },
 
                 { "PIDFile",                config_parse_path,            &u->service.pid_file,                            "Service" },
                 { "ExecStartPre",           config_parse_exec,            u->service.exec_command+SERVICE_EXEC_START_PRE,  "Service" },
index 58848356ea9aec577068650514cfe5b4df30b766..7b2586fb0c0a9d3b7b16a21efabbec857e3d6f69 100644 (file)
@@ -1124,6 +1124,7 @@ static int transaction_apply(Manager *m) {
 
                 job_add_to_run_queue(j);
                 job_add_to_dbus_queue(j);
+                job_start_timer(j);
         }
 
         /* As last step, kill all remaining job dependencies. */
@@ -2022,7 +2023,8 @@ static int process_event(Manager *m, struct epoll_event *ev) {
                 UNIT_VTABLE(w->data.unit)->fd_event(w->data.unit, w->fd, ev->events, w);
                 break;
 
-        case WATCH_TIMER: {
+        case WATCH_UNIT_TIMER:
+        case WATCH_JOB_TIMER: {
                 uint64_t v;
                 ssize_t k;
 
@@ -2035,7 +2037,10 @@ static int process_event(Manager *m, struct epoll_event *ev) {
                         return k < 0 ? -errno : -EIO;
                 }
 
-                UNIT_VTABLE(w->data.unit)->timer_event(w->data.unit, v, w);
+                if (w->type == WATCH_UNIT_TIMER)
+                        UNIT_VTABLE(w->data.unit)->timer_event(w->data.unit, v, w);
+                else
+                        job_timer_event(w->data.job, v, w);
                 break;
         }
 
index 32fbacc4615e1a1094b27a29b63e9c1b2f8e4cc2..c492ffa55f179a171dad2e430144081269fdc7a5 100644 (file)
@@ -57,7 +57,8 @@ enum WatchType {
         WATCH_SIGNAL,
         WATCH_NOTIFY,
         WATCH_FD,
-        WATCH_TIMER,
+        WATCH_UNIT_TIMER,
+        WATCH_JOB_TIMER,
         WATCH_MOUNT,
         WATCH_UDEV,
         WATCH_DBUS_WATCH,
@@ -69,6 +70,7 @@ struct Watch {
         WatchType type;
         union {
                 union Unit *unit;
+                struct Job *job;
                 DBusWatch *bus_watch;
                 DBusTimeout *bus_timeout;
         } data;
index 44dc8116306bb396326ab766bfbe78d71ac3d937..348d139586950637438d9ff58c5eb1a3b1af900d 100644 (file)
@@ -587,7 +587,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 timestamp1[FORMAT_TIMESTAMP_MAX],
                 timestamp2[FORMAT_TIMESTAMP_MAX],
                 timestamp3[FORMAT_TIMESTAMP_MAX],
-                timestamp4[FORMAT_TIMESTAMP_MAX];
+                timestamp4[FORMAT_TIMESTAMP_MAX],
+                timespan[FORMAT_TIMESPAN_MAX];
 
         assert(u);
         assert(u->meta.type >= 0);
@@ -627,6 +628,9 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         if (u->meta.fragment_path)
                 fprintf(f, "%s\tFragment Path: %s\n", prefix, u->meta.fragment_path);
 
+        if (u->meta.job_timeout > 0)
+                fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->meta.job_timeout));
+
         for (d = 0; d < _UNIT_DEPENDENCY_MAX; d++) {
                 Unit *other;
 
@@ -1003,7 +1007,6 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns) {
                          * failed previously due to EAGAIN. */
                         job_add_to_run_queue(u->meta.job);
 
-
                 /* Let's check whether this state change constitutes a
                  * finished job, or maybe cotradicts a running job and
                  * hence needs to invalidate jobs. */
@@ -1189,18 +1192,23 @@ int unit_watch_timer(Unit *u, usec_t delay, Watch *w) {
 
         assert(u);
         assert(w);
-        assert(w->type == WATCH_INVALID || (w->type == WATCH_TIMER && w->data.unit == u));
+        assert(w->type == WATCH_INVALID || (w->type == WATCH_UNIT_TIMER && w->data.unit == u));
 
         /* This will try to reuse the old timer if there is one */
 
-        if (w->type == WATCH_TIMER) {
+        if (w->type == WATCH_UNIT_TIMER) {
+                assert(w->data.unit == u);
+                assert(w->fd >= 0);
+
                 ours = false;
                 fd = w->fd;
-        } else {
+        } else if (w->type == WATCH_INVALID) {
+
                 ours = true;
                 if ((fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC)) < 0)
                         return -errno;
-        }
+        } else
+                assert_not_reached("Invalid watch type");
 
         zero(its);
 
@@ -1231,8 +1239,8 @@ int unit_watch_timer(Unit *u, usec_t delay, Watch *w) {
                         goto fail;
         }
 
+        w->type = WATCH_UNIT_TIMER;
         w->fd = fd;
-        w->type = WATCH_TIMER;
         w->data.unit = u;
 
         return 0;
@@ -1251,7 +1259,9 @@ void unit_unwatch_timer(Unit *u, Watch *w) {
         if (w->type == WATCH_INVALID)
                 return;
 
-        assert(w->type == WATCH_TIMER && w->data.unit == u);
+        assert(w->type == WATCH_UNIT_TIMER);
+        assert(w->data.unit == u);
+        assert(w->fd >= 0);
 
         assert_se(epoll_ctl(u->meta.manager->epoll_fd, EPOLL_CTL_DEL, w->fd, NULL) >= 0);
         close_nointr_nofail(w->fd);
index 55fe0fa60ec8818ce53fd5e964c3c95bf98e0202..cfad3ca53ba268883ffa3176c81c0cfea52dd13a 100644 (file)
@@ -143,6 +143,7 @@ struct Meta {
         Set *dependencies[_UNIT_DEPENDENCY_MAX];
 
         char *description;
+
         char *fragment_path; /* if loaded from a config file this is the primary path to it */
         usec_t fragment_mtime;
 
@@ -150,6 +151,8 @@ struct Meta {
          * the job for it */
         Job *job;
 
+        usec_t job_timeout;
+
         dual_timestamp inactive_exit_timestamp;
         dual_timestamp active_enter_timestamp;
         dual_timestamp active_exit_timestamp;