chiark / gitweb /
core: add EXTEND_TIMEOUT_USEC={usec} - prevent timeouts in startup/runtime/shutdown...
authorDaniel Black <danielgb@au.ibm.com>
Thu, 14 Dec 2017 11:17:43 +0000 (22:17 +1100)
committerSven Eden <yamakuzure@gmx.net>
Thu, 14 Dec 2017 11:17:43 +0000 (22:17 +1100)
With Type=notify services, EXTEND_TIMEOUT_USEC= messages will delay any startup/
runtime/shutdown timeouts.

A service that hasn't timed out, i.e, start time < TimeStartSec,
runtime < RuntimeMaxSec and stop time < TimeoutStopSec, may by sending
EXTEND_TIMEOUT_USEC=, allow the service to continue beyond the limit for
the execution phase (i.e TimeStartSec, RunTimeMaxSec and TimeoutStopSec).

EXTEND_TIMEOUT_USEC= must continue to be sent (in the same way as
WATCHDOG=1) within the time interval specified to continue to reprevent
the timeout from occuring.

Watchdog timeouts are also extended if a EXTEND_TIMEOUT_USEC is greater
than the remaining time on the watchdog counter.

Fixes #5868.

man/sd_notify.xml
test/TEST-16-EXTEND-TIMEOUT/assess.sh [new file with mode: 0755]
test/TEST-16-EXTEND-TIMEOUT/extend_timeout_test_service.sh [new file with mode: 0755]
test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-runtime.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-start.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-stop.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite-success-all.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite-success-runtime.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite-success-start.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite-success-stop.service [new file with mode: 0644]
test/TEST-16-EXTEND-TIMEOUT/testsuite.service [new file with mode: 0644]

index 366ed5da6d5a67ce034c30def8d703e567c78d57..5829fc3a1e3b472908dffd7781b813b2d2e1f03e 100644 (file)
         Example : <literal>WATCHDOG_USEC=20000000</literal></para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term>EXTEND_TIMEOUT_USEC=…</term>
+
+        <listitem><para>Tells the service manager to extend the startup, runtime or shutdown service timeout
+        corresponding the current state. The value specified is a time in microseconds during which the service must
+        send a new message. A service timeout will occur if the message isn't received, but only if the runtime of the
+        current state is beyond the original maximium times of <varname>TimeoutStartSec=</varname>, <varname>RuntimeMaxSec=</varname>,
+        and <varname>TimeoutStopSec=</varname>.
+        See <citerefentry><refentrytitle>elogind.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+        for effects on the service timeouts.</para></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term>FDSTORE=1</term>
 
diff --git a/test/TEST-16-EXTEND-TIMEOUT/assess.sh b/test/TEST-16-EXTEND-TIMEOUT/assess.sh
new file mode 100755 (executable)
index 0000000..e7f643f
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/bash
+set -v -x
+
+rm -f /test.log
+
+TL=/test.log.XXXXXXXX
+
+function wait_for()
+{
+    service=${1}
+    result=${2:-success}
+    time=${3:-45}
+
+    while [[ ! -f /${service}.terminated && ! -f /${service}.success && $time -gt 0  ]]
+    do
+        sleep 1
+        time=$(( $time - 1 ))
+    done
+
+    if [[ ! -f /${service}.${result} ]]
+    then
+        journalctl -u testsuite-${service/_/-}.service >> "${TL}"
+    fi
+}
+
+# This checks all stages, start, runtime and stop, can be extended by
+# EXTEND_TIMEOUT_USEC
+
+wait_for success_all
+
+# These check that EXTEND_TIMEOUT_USEC that occurs at greater than the
+# extend timeout interval but less then the stage limit (TimeoutStartSec,
+# RuntimeMaxSec, TimeoutStopSec) still succeed.
+
+wait_for success_start
+wait_for success_runtime
+wait_for success_stop
+
+# These ensure that EXTEND_TIMEOUT_USEC will still timeout in the
+# approprate stage, after the stage limit, when the EXTEND_TIMEOUT_USEC
+# message isn't sent within the extend timeout interval.
+
+wait_for fail_start startfail
+wait_for fail_stop stopfail
+wait_for fail_runtime runtimefail
+
+if [[ -f "${TL}" ]]
+then
+    # no mv
+    cp "${TL}" /test.log
+    exit 1
+else
+    touch /testok
+    exit 0
+fi
diff --git a/test/TEST-16-EXTEND-TIMEOUT/extend_timeout_test_service.sh b/test/TEST-16-EXTEND-TIMEOUT/extend_timeout_test_service.sh
new file mode 100755 (executable)
index 0000000..39847ec
--- /dev/null
@@ -0,0 +1,70 @@
+#!/bin/bash
+set -x
+set -e
+set -o pipefail
+
+# sleep interval (seconds)
+sleep_interval=1
+# extend_timeout_interval second(s)
+extend_timeout_interval=1
+# number of sleep_intervals before READY=1
+start_intervals=10
+# number of sleep_intervals before exiting
+stop_intervals=10
+# run intervals, number of sleep_intervals to run
+run_intervals=7
+# service name
+SERVICE=unknown
+
+while [ $# -gt 0 ];
+do
+    eval ${1%=*}=${1#*=}
+    shift
+done
+
+# We convert to usec
+extend_timeout_interval=$(( $extend_timeout_interval * 1000000 ))
+
+trap "{ touch /${SERVICE}.terminated; exit 1; }"  SIGTERM SIGABRT
+
+rm -f /${SERVICE}.*
+touch /${SERVICE}.startfail
+
+elogind-notify EXTEND_TIMEOUT_USEC=$extend_timeout_interval
+while [ $start_intervals -gt 0 ]
+do
+    sleep $sleep_interval
+    start_intervals=$(( $start_intervals - 1 ))
+    elogind-notify EXTEND_TIMEOUT_USEC=$extend_timeout_interval
+done
+
+elogind-notify --ready --status="Waiting for your request"
+
+touch /${SERVICE}.runtimefail
+rm /${SERVICE}.startfail
+
+elogind-notify EXTEND_TIMEOUT_USEC=$extend_timeout_interval
+while [ $run_intervals -gt 0 ]
+do
+    sleep $sleep_interval
+    run_intervals=$(( $run_intervals - 1 ))
+    elogind-notify EXTEND_TIMEOUT_USEC=$extend_timeout_interval
+done
+
+elogind-notify STOPPING=1
+
+touch /${SERVICE}.stopfail
+rm /${SERVICE}.runtimefail
+
+elogind-notify EXTEND_TIMEOUT_USEC=$extend_timeout_interval
+while [ $stop_intervals -gt 0 ]
+do
+    sleep $sleep_interval
+    stop_intervals=$(( $stop_intervals - 1 ))
+    elogind-notify EXTEND_TIMEOUT_USEC=$extend_timeout_interval
+done
+
+touch /${SERVICE}.success
+rm /${SERVICE}.stopfail
+
+exit 0
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-runtime.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-runtime.service
new file mode 100644 (file)
index 0000000..e0b9f6a
--- /dev/null
@@ -0,0 +1,13 @@
+
+[Unit]
+Description=Testsuite: Fail Runtime (EXTEND_TIMEOUT_USEC Didn't occur in sufficient time after RuntimeSecMax.)
+
+[Service]
+
+# EXTEND_TIMEOUT_USEC on runtime start (0) and 7 seconds after. Systemd will expect one at 7+5 (extend_timeout_interval)
+# seconds this won't happen until 7 + 7 (sleep interval) seconds. Therefore timeout at 12 seconds.
+Type=notify
+TimeoutStartSec=4
+TimeoutStopSec=4
+RuntimeMaxSec=10
+ExecStart=/extend_timeout_test_service.sh SERVICE=fail_runtime extend_timeout_interval=5 sleep_interval=7 start_intervals=0 run_intervals=2 stop_intervals=0
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-start.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-start.service
new file mode 100644 (file)
index 0000000..c3fcf23
--- /dev/null
@@ -0,0 +1,13 @@
+
+[Unit]
+Description=Testsuite: Fail Start (EXTEND_TIMEOUT_USEC Didn't occur in sufficient time after TimeoutStartSec.)
+
+[Service]
+
+# EXTEND_TIMEOUT_USEC on startup and 7 seconds from start. Systemd will expect one at 7+5 (extend_timeout_interval)
+# seconds  this won't happen until 7 + 7 (sleep interval) seconds. Therefore timeout at 12 seconds.
+Type=notify
+TimeoutStartSec=10
+TimeoutStopSec=4
+RuntimeMaxSec=4
+ExecStart=/extend_timeout_test_service.sh SERVICE=fail_start extend_timeout_interval=5 sleep_interval=7 start_intervals=2 run_intervals=0 stop_intervals=0
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-stop.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-fail-stop.service
new file mode 100644 (file)
index 0000000..ce76d10
--- /dev/null
@@ -0,0 +1,16 @@
+
+[Unit]
+Description=Testsuite: Fail Stop (EXTEND_TIMEOUT_USEC Didn't occur in sufficient time after TimeoutStopSec.)
+
+[Service]
+
+# EXTEND_TIMEOUT_USEC on stop (0) and 7 seconds after. Systemd will expect one at 7+5 (extend_timeout_interval)
+# seconds this won't happen until 7 + 7 (sleep interval) seconds. Therefore timeout at 12 seconds.
+Type=notify
+TimeoutStartSec=4
+TimeoutStopSec=10
+RuntimeMaxSec=4
+ExecStart=/extend_timeout_test_service.sh SERVICE=fail_stop extend_timeout_interval=5 sleep_interval=7 start_intervals=0 run_intervals=0 stop_intervals=2
+# Due to 6041a7ee2c1bbff6301082f192fc1b0882400d42 SIGTERM isn't sent as the service shuts down with STOPPING=1
+# This file makes the test assess.sh quicker by notifing it that this test has finished.
+ExecStopPost=/bin/bash -c '[[ $SERVICE_RESULT == timeout && $EXIT_CODE == killed ]] && touch /fail_runtime.terminated'
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-all.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-all.service
new file mode 100644 (file)
index 0000000..666f422
--- /dev/null
@@ -0,0 +1,14 @@
+
+[Unit]
+Description=Testsuite: EXTEND_TIMEOUT_USEC Success - extend timeout on all services
+
+[Service]
+
+# Normal success - startup / runtime / shutdown all take 8 seconds which is within the EXTEND_TIMEOUT_USEC=4 seconds interval
+# runtime is 8+8+8 seconds. so we are relying on the EXTEND_TIMEOUT_USEC to exceed all stages, Start, Runtime and Stop.
+# success occurs after 24 seconds
+Type=notify
+TimeoutStartSec=4
+TimeoutStopSec=4
+RuntimeMaxSec=4
+ExecStart=/extend_timeout_test_service.sh SERVICE=success_all extend_timeout_interval=4 sleep_interval=2 start_intervals=3 run_intervals=3 stop_intervals=3
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-runtime.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-runtime.service
new file mode 100644 (file)
index 0000000..dc226f5
--- /dev/null
@@ -0,0 +1,13 @@
+
+[Unit]
+Description=Testsuite: Success Runtime (EXTEND_TIMEOUT_USEC > WATCHDOG_USEC however < RuntimeMaxSec)
+
+[Service]
+
+# EXTEND_TIMEOUT_USEC=4 second once during runtime, but sleep for 6 seconds.
+# Runtime is 6 seconds and < RuntimeMaxSec so still successful.
+Type=notify
+TimeoutStartSec=4
+TimeoutStopSec=4
+RuntimeMaxSec=8
+ExecStart=/extend_timeout_test_service.sh SERVICE=success_runtime extend_timeout_interval=4 sleep_interval=6 start_intervals=0 run_intervals=1 stop_intervals=0
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-start.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-start.service
new file mode 100644 (file)
index 0000000..228eece
--- /dev/null
@@ -0,0 +1,13 @@
+
+[Unit]
+Description=Testsuite: Success Start (EXTEND_TIMEOUT_USEC > WATCHDOG_USEC however < TimeoutStartSec)
+
+[Service]
+
+# EXTEND_TIMEOUT_USEC=4 second interval once at startup, but sleep 6 seconds.
+# Therefore startup is 6 seconds and < TimeoutStartSec so still successful.
+Type=notify
+TimeoutStartSec=8
+TimeoutStopSec=4
+RuntimeMaxSec=4
+ExecStart=/extend_timeout_test_service.sh SERVICE=success_start extend_timeout_interval=4 sleep_interval=6 start_intervals=1 run_intervals=0 stop_intervals=0
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-stop.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite-success-stop.service
new file mode 100644 (file)
index 0000000..b809397
--- /dev/null
@@ -0,0 +1,13 @@
+
+[Unit]
+Description=Testsuite: Success Stop (EXTEND_TIMEOUT_USEC > WATCHDOG_USEC however < TimeoutStopSec)
+
+[Service]
+
+# EXTEND_TIMEOUT_USEC=4 seconds once during shutdown, but sleep for 6 seconds.
+# Therefore stop time is 6 seconds and < TimeoutStopSec so still successful.
+Type=notify
+TimeoutStartSec=4
+TimeoutStopSec=8
+RuntimeMaxSec=4
+ExecStart=/extend_timeout_test_service.sh SERVICE=success_stop extend_timeout_interval=4 sleep_interval=6 start_intervals=0 run_intervals=0 stop_intervals=1
diff --git a/test/TEST-16-EXTEND-TIMEOUT/testsuite.service b/test/TEST-16-EXTEND-TIMEOUT/testsuite.service
new file mode 100644 (file)
index 0000000..e1cd5ca
--- /dev/null
@@ -0,0 +1,18 @@
+
+[Unit]
+Description=Testsuite: Assess all other testsuite-*.services worked as expected
+
+Wants=testsuite-success-all.service
+Wants=testsuite-success-start.service
+Wants=testsuite-success-runtime.service
+Wants=testsuite-success-stop.service
+Wants=testsuite-fail-start.service
+Wants=testsuite-fail-stop.service
+Wants=testsuite-fail-runtime.service
+StopWhenUnneeded=yes
+
+[Service]
+
+Type=simple
+ExecStartPre=/assess.sh
+ExecStart=/bin/true