chiark / gitweb /
service: add watchdog timestamp
authorMichael Olbrich <m.olbrich@pengutronix.de>
Wed, 1 Feb 2012 16:17:12 +0000 (17:17 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 1 Feb 2012 18:29:19 +0000 (19:29 +0100)
This patch adds WatchdogTimestamp[Monotonic] to the systemd service
D-Bus API. The timestamp is updated to the current time when the
service calls 'sd_nofity("WATCHDOG=1\n")'.
Using a timestamp instead of an 'alive' flag has two advantages:
1. No timeout is needed to define when a service is no longer alive.
   This simplifies both configuration (no timeout value) and
   implementation (no timeout event).
2. It is more robust. A 'dead' service might not be detected should
    systemd 'forget' to reset an 'alive' flag. It is much less likely
    to get a valid new timestamp if a service died.

man/sd_notify.xml
src/dbus-service.c
src/service.c
src/service.h
src/systemd/sd-daemon.h

index 0209146..9797a5f 100644 (file)
                                 itself. Example:
                                 "MAINPID=4711"</para></listitem>
                         </varlistentry>
+
+                        <varlistentry>
+                                <term>WATCHDOG=1</term>
+
+                                <listitem><para>Tells systemd to
+                                update the watchdog timestamp.
+                                Services using this feature should do
+                                this in regular intervals. A watchdog
+                                framework can use the timestamps to
+                                detect failed
+                                services.</para></listitem>
+                        </varlistentry>
                 </variablelist>
 
                 <para>It is recommended to prefix variable names that
index e1f6370..5d95b37 100644 (file)
@@ -43,6 +43,8 @@
         "  <property name=\"NotifyAccess\" type=\"s\" access=\"read\"/>\n" \
         "  <property name=\"RestartUSec\" type=\"t\" access=\"read\"/>\n" \
         "  <property name=\"TimeoutUSec\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogTimestamp\" type=\"t\" access=\"read\"/>\n" \
+        "  <property name=\"WatchdogTimestampMonotonic\" type=\"t\" access=\"read\"/>\n" \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPre")                      \
         BUS_EXEC_COMMAND_INTERFACE("ExecStart")                         \
         BUS_EXEC_COMMAND_INTERFACE("ExecStartPost")                     \
@@ -86,6 +88,8 @@ const char bus_service_invalidating_properties[] =
         "ExecStop\0"
         "ExecStopPost\0"
         "ExecMain\0"
+        "WatchdogTimestamp\0"
+        "WatchdogTimestampMonotonic\0"
         "MainPID\0"
         "ControlPID\0"
         "StatusText\0";
@@ -106,32 +110,34 @@ static const BusProperty bus_exec_main_status_properties[] = {
 };
 
 static const BusProperty bus_service_properties[] = {
-        { "Type",                   bus_service_append_type,          "s", offsetof(Service, type)                      },
-        { "Restart",                bus_service_append_restart,       "s", offsetof(Service, restart)                   },
-        { "PIDFile",                bus_property_append_string,       "s", offsetof(Service, pid_file),            true },
-        { "NotifyAccess",           bus_service_append_notify_access, "s", offsetof(Service, notify_access)             },
-        { "RestartUSec",            bus_property_append_usec,         "t", offsetof(Service, restart_usec)              },
-        { "TimeoutUSec",            bus_property_append_usec,         "t", offsetof(Service, timeout_usec)              },
+        { "Type",                   bus_service_append_type,          "s", offsetof(Service, type)                         },
+        { "Restart",                bus_service_append_restart,       "s", offsetof(Service, restart)                      },
+        { "PIDFile",                bus_property_append_string,       "s", offsetof(Service, pid_file),               true },
+        { "NotifyAccess",           bus_service_append_notify_access, "s", offsetof(Service, notify_access)                },
+        { "RestartUSec",            bus_property_append_usec,         "t", offsetof(Service, restart_usec)                 },
+        { "TimeoutUSec",            bus_property_append_usec,         "t", offsetof(Service, timeout_usec)                 },
+        { "WatchdogTimestamp",      bus_property_append_usec,         "t", offsetof(Service, watchdog_timestamp.realtime)  },
+        { "WatchdogTimestampMonotonic",bus_property_append_usec,      "t", offsetof(Service, watchdog_timestamp.monotonic) },
         BUS_EXEC_COMMAND_PROPERTY("ExecStartPre",  offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]),  true ),
         BUS_EXEC_COMMAND_PROPERTY("ExecStart",     offsetof(Service, exec_command[SERVICE_EXEC_START]),      true ),
         BUS_EXEC_COMMAND_PROPERTY("ExecStartPost", offsetof(Service, exec_command[SERVICE_EXEC_START_POST]), true ),
         BUS_EXEC_COMMAND_PROPERTY("ExecReload",    offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]),     true ),
         BUS_EXEC_COMMAND_PROPERTY("ExecStop",      offsetof(Service, exec_command[SERVICE_EXEC_STOP]),       true ),
         BUS_EXEC_COMMAND_PROPERTY("ExecStopPost",  offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]),  true ),
-        { "PermissionsStartOnly",   bus_property_append_bool,         "b", offsetof(Service, permissions_start_only)    },
-        { "RootDirectoryStartOnly", bus_property_append_bool,         "b", offsetof(Service, root_directory_start_only) },
-        { "RemainAfterExit",        bus_property_append_bool,         "b", offsetof(Service, remain_after_exit)         },
-        { "GuessMainPID",           bus_property_append_bool,         "b", offsetof(Service, guess_main_pid)            },
-        { "MainPID",                bus_property_append_pid,          "u", offsetof(Service, main_pid)                  },
-        { "ControlPID",             bus_property_append_pid,          "u", offsetof(Service, control_pid)               },
-        { "BusName",                bus_property_append_string,       "s", offsetof(Service, bus_name),            true },
-        { "StatusText",             bus_property_append_string,       "s", offsetof(Service, status_text),         true },
+        { "PermissionsStartOnly",   bus_property_append_bool,         "b", offsetof(Service, permissions_start_only)       },
+        { "RootDirectoryStartOnly", bus_property_append_bool,         "b", offsetof(Service, root_directory_start_only)    },
+        { "RemainAfterExit",        bus_property_append_bool,         "b", offsetof(Service, remain_after_exit)            },
+        { "GuessMainPID",           bus_property_append_bool,         "b", offsetof(Service, guess_main_pid)               },
+        { "MainPID",                bus_property_append_pid,          "u", offsetof(Service, main_pid)                     },
+        { "ControlPID",             bus_property_append_pid,          "u", offsetof(Service, control_pid)                  },
+        { "BusName",                bus_property_append_string,       "s", offsetof(Service, bus_name),               true },
+        { "StatusText",             bus_property_append_string,       "s", offsetof(Service, status_text),            true },
 #ifdef HAVE_SYSV_COMPAT
-        { "SysVRunLevels",          bus_property_append_string,       "s", offsetof(Service, sysv_runlevels),      true },
-        { "SysVStartPriority",      bus_property_append_int,          "i", offsetof(Service, sysv_start_priority)       },
-        { "SysVPath",               bus_property_append_string,       "s", offsetof(Service, sysv_path),           true },
+        { "SysVRunLevels",          bus_property_append_string,       "s", offsetof(Service, sysv_runlevels),         true },
+        { "SysVStartPriority",      bus_property_append_int,          "i", offsetof(Service, sysv_start_priority)          },
+        { "SysVPath",               bus_property_append_string,       "s", offsetof(Service, sysv_path),              true },
 #endif
-        { "FsckPassNo",             bus_property_append_int,          "i", offsetof(Service, fsck_passno)               },
+        { "FsckPassNo",             bus_property_append_int,          "i", offsetof(Service, fsck_passno)                  },
         { NULL, }
 };
 
index 4dcd306..d2a2dfc 100644 (file)
@@ -205,6 +205,19 @@ static void service_connection_unref(Service *s) {
         unit_ref_unset(&s->accept_socket);
 }
 
+static void service_stop_watchdog(Service *s) {
+        assert(s);
+
+        s->watchdog_timestamp.realtime = 0;
+        s->watchdog_timestamp.monotonic = 0;
+}
+
+static void service_reset_watchdog(Service *s) {
+        assert(s);
+
+        dual_timestamp_get(&s->watchdog_timestamp);
+}
+
 static void service_done(Unit *u) {
         Service *s = SERVICE(u);
 
@@ -1476,6 +1489,9 @@ static void service_set_state(Service *s, ServiceState state) {
                 service_connection_unref(s);
         }
 
+        if (state == SERVICE_STOP)
+                service_stop_watchdog(s);
+
         /* For the inactive states unit_notify() will trim the cgroup,
          * but for exit we have to do that ourselves... */
         if (state == SERVICE_EXITED && UNIT(s)->manager->n_reloading <= 0)
@@ -2411,6 +2427,8 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
                         unit_serialize_item_format(u, f, "main-exec-status-status", "%i", s->main_exec_status.status);
                 }
         }
+        if (dual_timestamp_is_set(&s->watchdog_timestamp))
+                dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp);
 
         return 0;
 }
@@ -2511,6 +2529,8 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp);
         else if (streq(key, "main-exec-status-exit"))
                 dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp);
+        else if (streq(key, "watchdog-timestamp"))
+                dual_timestamp_deserialize(value, &s->watchdog_timestamp);
         else
                 log_debug("Unknown serialization key '%s'", key);
 
@@ -3069,6 +3089,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
                 }
 
         }
+        if (strv_find(tags, "WATCHDOG=1")) {
+                log_debug("%s: got WATCHDOG=1", u->id);
+                service_reset_watchdog(s);
+        }
 
         /* Notify clients about changed status or main pid */
         unit_add_to_dbus_queue(u);
index 0b4f8be..dbae68b 100644 (file)
@@ -100,6 +100,8 @@ struct Service {
         usec_t restart_usec;
         usec_t timeout_usec;
 
+        dual_timestamp watchdog_timestamp;
+
         ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX];
         ExecContext exec_context;
 
index eb2a606..7b664bf 100644 (file)
@@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path);
      MAINPID=...  The main pid of a daemon, in case systemd did not
                   fork off the process itself. Example: "MAINPID=4711"
 
+     WATCHDOG=1   Tells systemd to update the watchdog timestamp.
+                  Services using this feature should do this in
+                  regular intervals. A watchdog framework can use the
+                  timestamps to detect failed services.
+
   Daemons can choose to send additional variables. However, it is
   recommended to prefix variable names not listed above with X_.