chiark / gitweb /
job: optionally, when a job timeout is hit, also execute a failure action
authorLennart Poettering <lennart@poettering.net>
Tue, 28 Oct 2014 00:49:07 +0000 (01:49 +0100)
committerLennart Poettering <lennart@poettering.net>
Tue, 28 Oct 2014 01:19:55 +0000 (02:19 +0100)
man/systemd.unit.xml
src/core/dbus-unit.c
src/core/job.c
src/core/load-fragment-gperf.gperf.m4
src/core/unit.c
src/core/unit.h

index 88c9d7f..803eff2 100644 (file)
 
                         <varlistentry>
                                 <term><varname>JobTimeoutSec=</varname></term>
-
-                                <listitem><para>When clients are
-                                waiting for a job of this unit to
-                                complete, time out after the specified
-                                time. If this time limit is reached,
-                                the job will be cancelled, the unit
-                                however will not change state or even
-                                enter the <literal>failed</literal>
-                                mode. This value defaults to 0 (job
-                                timeouts disabled), except for device
+                                <term><varname>JobTimeoutAction=</varname></term>
+                                <term><varname>JobTimeoutRebootArgument=</varname></term>
+
+                                <listitem><para>When a job for this
+                                unit is queued a time-out may be
+                                configured. If this time limit is
+                                reached, the job will be cancelled,
+                                the unit however will not change state
+                                or even enter the
+                                <literal>failed</literal> mode. This
+                                value defaults to 0 (job timeouts
+                                disabled), except for device
                                 units. NB: this timeout is independent
                                 from any unit-specific timeout (for
                                 example, the timeout set with
-                                <varname>Timeout=</varname> in service
+                                <varname>StartTimeoutSec=</varname> in service
                                 units) as the job timeout has no
                                 effect on the unit itself, only on the
                                 job that might be pending for it. Or
                                 timeout set with this option however
                                 is useful to abort only the job
                                 waiting for the unit state to
-                                change.</para></listitem>
+                                change.</para>
+
+                                <para><varname>JobTimeoutAction=</varname>
+                                optionally configures an additional
+                                action to take when the time-out is
+                                hit. It takes the same values as the
+                                per-service
+                                <varname>StartLimitAction=</varname>
+                                setting, see
+                                <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+                                for details. Defaults to
+                                <option>none</option>. <varname>JobTimeoutRebootArgument=</varname>
+                                configures an optional reboot string
+                                to pass to the
+                                <citerefentry><refentrytitle>reboot</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+                                system call.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
index 8fe83ae..5f2276a 100644 (file)
@@ -33,6 +33,7 @@
 
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_load_state, unit_load_state, UnitLoadState);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_job_mode, job_mode, JobMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_failure_action, failure_action, FailureAction);
 
 static int property_get_names(
                 sd_bus *bus,
@@ -562,6 +563,8 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("IgnoreOnSnapshot", "b", bus_property_get_bool, offsetof(Unit, ignore_on_snapshot), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NeedDaemonReload", "b", property_get_need_daemon_reload, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JobTimeoutUSec", "t", bus_property_get_usec, offsetof(Unit, job_timeout), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("JobTimeoutAction", "s", property_get_failure_action, offsetof(Unit, job_timeout_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("JobTimeoutRebootArgument", "s", NULL, offsetof(Unit, job_timeout_reboot_arg), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ConditionResult", "b", bus_property_get_bool, offsetof(Unit, condition_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_PROPERTY_DUAL_TIMESTAMP("ConditionTimestamp", offsetof(Unit, condition_timestamp), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("Conditions", "a(sbbsi)", property_get_conditions, 0, 0),
index ef5dbce..aa205ba 100644 (file)
@@ -858,14 +858,18 @@ finish:
 
 static int job_dispatch_timer(sd_event_source *s, uint64_t monotonic, void *userdata) {
         Job *j = userdata;
+        Unit *u;
 
         assert(j);
         assert(s == j->timer_event_source);
 
-        log_warning_unit(j->unit->id, "Job %s/%s timed out.",
-                         j->unit->id, job_type_to_string(j->type));
+        log_warning_unit(j->unit->id, "Job %s/%s timed out.", j->unit->id, job_type_to_string(j->type));
 
+        u = j->unit;
         job_finish_and_invalidate(j, JOB_TIMEOUT, true);
+
+        failure_action(u->manager, u->job_timeout_action, u->job_timeout_reboot_arg);
+
         return 0;
 }
 
index 8805411..26d40fb 100644 (file)
@@ -152,6 +152,8 @@ Unit.OnFailureIsolate,           config_parse_job_mode_isolate,      0,
 Unit.IgnoreOnIsolate,            config_parse_bool,                  0,                             offsetof(Unit, ignore_on_isolate)
 Unit.IgnoreOnSnapshot,           config_parse_bool,                  0,                             offsetof(Unit, ignore_on_snapshot)
 Unit.JobTimeoutSec,              config_parse_sec,                   0,                             offsetof(Unit, job_timeout)
+Unit.JobTimeoutAction,           config_parse_failure_action,        0,                             offsetof(Unit, job_timeout_action)
+Unit.JobTimeoutRebootArgument,   config_parse_string,                0,                             offsetof(Unit, job_timeout_reboot_arg)
 Unit.ConditionPathExists,        config_parse_unit_condition_path,   CONDITION_PATH_EXISTS,         0
 Unit.ConditionPathExistsGlob,    config_parse_unit_condition_path,   CONDITION_PATH_EXISTS_GLOB,    0
 Unit.ConditionPathIsDirectory,   config_parse_unit_condition_path,   CONDITION_PATH_IS_DIRECTORY,   0
index e40e6f2..afb760d 100644 (file)
@@ -520,6 +520,8 @@ void unit_free(Unit *u) {
         strv_free(u->dropin_paths);
         free(u->instance);
 
+        free(u->job_timeout_reboot_arg);
+
         set_free_free(u->names);
 
         unit_unwatch_all_pids(u);
@@ -921,6 +923,12 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         if (u->job_timeout > 0)
                 fprintf(f, "%s\tJob Timeout: %s\n", prefix, format_timespan(timespan, sizeof(timespan), u->job_timeout, 0));
 
+        if (u->job_timeout_action != FAILURE_ACTION_NONE)
+                fprintf(f, "%s\tJob Timeout Action: %s\n", prefix, failure_action_to_string(u->job_timeout_action));
+
+        if (u->job_timeout_reboot_arg)
+                fprintf(f, "%s\tJob Timeout Reboot Argument: %s\n", prefix, u->job_timeout_reboot_arg);
+
         condition_dump_list(u->conditions, f, prefix);
 
         if (dual_timestamp_is_set(&u->condition_timestamp))
index 43ab4d1..bbad546 100644 (file)
@@ -41,6 +41,7 @@ typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
 #include "condition.h"
 #include "install.h"
 #include "unit-name.h"
+#include "failure-action.h"
 
 enum UnitActiveState {
         UNIT_ACTIVE,
@@ -112,7 +113,10 @@ struct Unit {
         /* JOB_NOP jobs are special and can be installed without disturbing the real job. */
         Job *nop_job;
 
+        /* Job timeout and action to take */
         usec_t job_timeout;
+        FailureAction job_timeout_action;
+        char *job_timeout_reboot_arg;
 
         /* References to this */
         LIST_HEAD(UnitRef, refs);