chiark / gitweb /
job: optionally, when a job timeout is hit, also execute a failure action
authorLennart Poettering <lennart@poettering.net>
Tue, 28 Oct 2014 00:49:07 +0000 (01:49 +0100)
committerLennart Poettering <lennart@poettering.net>
Tue, 28 Oct 2014 01:19:55 +0000 (02:19 +0100)
man/systemd.unit.xml
src/core/dbus-unit.c
src/core/job.c
src/core/load-fragment-gperf.gperf.m4
src/core/unit.c
src/core/unit.h

index 88c9d7f8e893e88ac83113c01c9217786f5b847f..803eff24aa691e918e0e74939ac758222d286ca3 100644 (file)
 
                         <varlistentry>
                                 <term><varname>JobTimeoutSec=</varname></term>
-
-                                <listitem><para>When clients are
-                                waiting for a job of this unit to
-                                complete, time out after the specified
-                                time. If this time limit is reached,
-                                the job will be cancelled, the unit
-                                however will not change state or even
-                                enter the <literal>failed</literal>
-                                mode. This value defaults to 0 (job
-                                timeouts disabled), except for device
+                                <term><varname>JobTimeoutAction=</varname></term>
+                                <term><varname>JobTimeoutRebootArgument=</varname></term>
+
+                                <listitem><para>When a job for this
+                                unit is queued a time-out may be
+                                configured. If this time limit is
+                                reached, the job will be cancelled,
+                                the unit however will not change state
+                                or even enter the
+                                <literal>failed</literal> mode. This
+                                value defaults to 0 (job timeouts
+                                disabled), except for device
                                 units. NB: this timeout is independent
                                 from any unit-specific timeout (for
                                 example, the timeout set with
-                                <varname>Timeout=</varname> in service
+                                <varname>StartTimeoutSec=</varname> in service
                                 units) as the job timeout has no
                                 effect on the unit itself, only on the
                                 job that might be pending for it. Or
                                 timeout set with this option however
                                 is useful to abort only the job
                                 waiting for the unit state to
-                                change.</para></listitem>
+                                change.</para>
+
+                                <para><varname>JobTimeoutAction=</varname>
+                                optionally configures an additional
+                                action to take when the time-out is
+                                hit. It takes the same values as the
+                                per-service
+                                <varname>StartLimitAction=</varname>
+                                setting, see
+                                <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+                                for details. Defaults to
+                                <option>none</option>. <varname>JobTimeoutRebootArgument=</varname>
+                                configures an optional reboot string
+                                to pass to the
+                                <citerefentry><refentrytitle>reboot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+                                system call.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
index 8fe83aefec62fdf9a7ca53e6616a6ccfa4535f39..5f2276af9c1ca92d451bba3c9e4d8d5dbb40e560 100644 (file)
@@ -33,6 +33,7 @@
 
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
 
 static int property_get_names(
                 sd_bus *bus,
@@ -562,6 +563,8 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("IgnoreOnSnapshot", "b", bus_property_get_bool, offsetof(Unit, ignore_on_snapshot), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_PROPERTY_DUAL_TIMESTAMP("ConditionTimestamp", offsetof(Unit, condition_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, 0, 0),
index ef5dbce1a336af9464db91c34d77de8d39c5895a..aa205ba781b0066c2289e64e1a239498b73bbfca 100644 (file)
@@ -858,14 +858,18 @@ finish:
 
 static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *userdata) {
         Job *j = userdata;
+        Unit *u;
 
         assert(j);
         assert(s == j->timer_event_source);
 
-        log_warning_unit(j->unit->id, "Job %s/%s timed out.",
-                         j->unit->id, job_type_to_string(j->type));
+        log_warning_unit(j->unit->id, "Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
 
+        u = j->unit;
         job_finish_and_invalidate(j, JOB_TIMEOUT, true);
+
+        failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
+
         return 0;
 }
 
index 8805411f28d65105af14aeb3b7ea3c30ea5273ea..26d40fbc6d38e072a26380dfe1a7e3a8f86bed46 100644 (file)
@@ -152,6 +152,8 @@ Unit.OnFailureIsolate,           config_parse_job_mode_isolate,      0,
 Unit.IgnoreOnIsolate,            config_parse_bool,                  0,                             offsetof(Unit, ignore_on_isolate)
 Unit.IgnoreOnSnapshot,           config_parse_bool,                  0,                             offsetof(Unit, ignore_on_snapshot)
 Unit.JobTimeoutSec,              config_parse_sec,                   0,                             offsetof(Unit, job_timeout)
+Unit.JobTimeoutAction,           config_parse_failure_action,        0,                             offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutRebootArgument,   config_parse_string,                0,                             offsetof(Unit, job_timeout_reboot_arg)
 Unit.ConditionPathExists,        config_parse_unit_condition_path,   CONDITION_PATH_EXISTS,         0
 Unit.ConditionPathExistsGlob,    config_parse_unit_condition_path,   CONDITION_PATH_EXISTS_GLOB,    0
 Unit.ConditionPathIsDirectory,   config_parse_unit_condition_path,   CONDITION_PATH_IS_DIRECTORY,   0
index e40e6f2068c682f58e135f254cf65202f84351d6..afb760d4a3ecd334e2fd435beeac780fb68f2e7c 100644 (file)
@@ -520,6 +520,8 @@ void unit_free(Unit *u) {
         strv_free(u->dropin_paths);
         free(u->instance);
 
+        free(u->job_timeout_reboot_arg);
+
         set_free_free(u->names);
 
         unit_unwatch_all_pids(u);
@@ -921,6 +923,12 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         if (u->job_timeout > 0)
                 fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
 
+        if (u->job_timeout_action != FAILURE_ACTION_NONE)
+                fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action));
+
+        if (u->job_timeout_reboot_arg)
+                fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
+
         condition_dump_list(u->conditions, f, prefix);
 
         if (dual_timestamp_is_set(&u->condition_timestamp))
index 43ab4d1045de148a8e86949cb8d99476326b6e75..bbad546356c6114c08a2e2df4933c5b08d273661 100644 (file)
@@ -41,6 +41,7 @@ typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
 #include "condition.h"
 #include "install.h"
 #include "unit-name.h"
+#include "failure-action.h"
 
 enum UnitActiveState {
         UNIT_ACTIVE,
@@ -112,7 +113,10 @@ struct Unit {
         /* JOB_NOP jobs are special and can be installed without disturbing the real job. */
         Job *nop_job;
 
+        /* Job timeout and action to take */
         usec_t job_timeout;
+        FailureAction job_timeout_action;
+        char *job_timeout_reboot_arg;
 
         /* References to this */
         LIST_HEAD(UnitRef, refs);