From db2cb23b5b179707000d28a11efb3d888d06ee80 Mon Sep 17 00:00:00 2001 From: Umut Tezduyar Lindskog Date: Tue, 28 Oct 2014 16:35:40 +0100 Subject: [PATCH] core: send sigabrt on watchdog timeout to get the stacktrace if sigabrt doesn't do the job, follow regular shutdown routine, sigterm > sigkill. --- TODO | 2 -- man/systemd.service.xml | 5 +++-- src/core/busname.c | 2 +- src/core/mount.c | 3 ++- src/core/scope.c | 2 +- src/core/service.c | 37 ++++++++++++++++++++++++++----------- src/core/service.h | 1 + src/core/socket.c | 3 ++- src/core/swap.c | 3 ++- src/core/unit.c | 24 ++++++++++++++++++------ src/core/unit.h | 8 +++++++- 11 files changed, 63 insertions(+), 27 deletions(-) diff --git a/TODO b/TODO index 10b22588d..b07d66471 100644 --- a/TODO +++ b/TODO @@ -48,8 +48,6 @@ Features: * consider showing the unit names during boot up in the status output, not just the unit descriptions -* send SIGABRT when a service watchdog is triggered, by default, so that we acquire a backtrace of the hang. - * dhcp: do we allow configuring dhcp routes on interfaces that are not the one we got the dhcp info from? * maybe allow timer units with an empty Units= setting, so that they diff --git a/man/systemd.service.xml b/man/systemd.service.xml index 115d1692e..e563b1968 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -593,8 +593,9 @@ (i.e. the "keep-alive ping"). If the time between two such calls is larger than the configured time, then the service - is placed in a failed state. By - setting Restart= to + is placed in a failed state and it will + be terminated with SIGABRT. + By setting Restart= to or , the service will be automatically restarted. The diff --git a/src/core/busname.c b/src/core/busname.c index 22d2a6d24..68cb6ca7b 100644 --- a/src/core/busname.c +++ b/src/core/busname.c @@ -446,7 +446,7 @@ static void busname_enter_signal(BusName *n, BusNameState state, BusNameResult f r = unit_kill_context(UNIT(n), &kill_context, - state != BUSNAME_SIGTERM, + state != BUSNAME_SIGTERM ? KILL_KILL : KILL_TERMINATE, -1, n->control_pid, false); diff --git a/src/core/mount.c b/src/core/mount.c index e284357c6..01243c381 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -775,7 +775,8 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) { r = unit_kill_context( UNIT(m), &m->kill_context, - state != MOUNT_MOUNTING_SIGTERM && state != MOUNT_UNMOUNTING_SIGTERM && state != MOUNT_REMOUNTING_SIGTERM, + (state != MOUNT_MOUNTING_SIGTERM && state != MOUNT_UNMOUNTING_SIGTERM && state != MOUNT_REMOUNTING_SIGTERM) ? + KILL_KILL : KILL_TERMINATE, -1, m->control_pid, false); diff --git a/src/core/scope.c b/src/core/scope.c index e8f9e8dd7..0f7c1f97c 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -243,7 +243,7 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) { r = unit_kill_context( UNIT(s), &s->kill_context, - state != SCOPE_STOP_SIGTERM, + state != SCOPE_STOP_SIGTERM ? KILL_KILL : KILL_TERMINATE, -1, -1, false); if (r < 0) goto fail; diff --git a/src/core/service.c b/src/core/service.c index d160c4e93..2b1677873 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -56,6 +56,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = { [SERVICE_EXITED] = UNIT_ACTIVE, [SERVICE_RELOAD] = UNIT_RELOADING, [SERVICE_STOP] = UNIT_DEACTIVATING, + [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_STOP_POST] = UNIT_DEACTIVATING, @@ -76,6 +77,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_EXITED] = UNIT_ACTIVE, [SERVICE_RELOAD] = UNIT_RELOADING, [SERVICE_STOP] = UNIT_DEACTIVATING, + [SERVICE_STOP_SIGABRT] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING, [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING, [SERVICE_STOP_POST] = UNIT_DEACTIVATING, @@ -663,7 +665,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, - SERVICE_STOP_POST, + SERVICE_STOP_SIGABRT, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_AUTO_RESTART)) s->timer_event_source = sd_event_source_unref(s->timer_event_source); @@ -672,7 +674,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, - SERVICE_STOP_POST, + SERVICE_STOP_SIGABRT, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { service_unwatch_main_pid(s); s->main_command = NULL; @@ -682,7 +684,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, - SERVICE_STOP_POST, + SERVICE_STOP_SIGABRT, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { service_unwatch_control_pid(s); s->control_command = NULL; @@ -696,7 +698,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, - SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) && + SERVICE_STOP_SIGABRT, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) && !(state == SERVICE_DEAD && UNIT(s)->job)) { service_close_socket_fd(s); service_connection_unref(s); @@ -750,7 +752,7 @@ static int service_coldplug(Unit *u) { SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, - SERVICE_STOP_POST, + SERVICE_STOP_SIGABRT, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { usec_t k; @@ -779,7 +781,7 @@ static int service_coldplug(Unit *u) { SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, - SERVICE_STOP_POST, + SERVICE_STOP_SIGABRT, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) { r = unit_watch_pid(UNIT(s), s->main_pid); if (r < 0) @@ -791,7 +793,7 @@ static int service_coldplug(Unit *u) { SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, - SERVICE_STOP_POST, + SERVICE_STOP_SIGABRT, SERVICE_STOP_POST, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) { r = unit_watch_pid(UNIT(s), s->control_pid); if (r < 0) @@ -1181,7 +1183,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f r = unit_kill_context( UNIT(s), &s->kill_context, - state != SERVICE_STOP_SIGTERM && state != SERVICE_FINAL_SIGTERM, + (state != SERVICE_STOP_SIGTERM && state != SERVICE_FINAL_SIGTERM && state != SERVICE_STOP_SIGABRT) ? + KILL_KILL : (state == SERVICE_STOP_SIGABRT ? KILL_ABORT : KILL_TERMINATE), s->main_pid, s->control_pid, s->main_pid_alien); @@ -1197,7 +1200,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f } service_set_state(s, state); - } else if (state == SERVICE_STOP_SIGTERM) + } else if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGABRT) service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_SUCCESS); else if (state == SERVICE_STOP_SIGKILL) service_enter_stop_post(s, SERVICE_SUCCESS); @@ -1211,7 +1214,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f fail: log_warning_unit(UNIT(s)->id, "%s failed to kill processes: %s", UNIT(s)->id, strerror(-r)); - if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGKILL) + if (state == SERVICE_STOP_SIGTERM || state == SERVICE_STOP_SIGKILL || + state == SERVICE_STOP_SIGABRT) service_enter_stop_post(s, SERVICE_FAILURE_RESOURCES); else service_enter_dead(s, SERVICE_FAILURE_RESOURCES, true); @@ -1637,6 +1641,7 @@ static int service_start(Unit *u) { /* We cannot fulfill this request right now, try again later * please! */ if (s->state == SERVICE_STOP || + s->state == SERVICE_STOP_SIGABRT || s->state == SERVICE_STOP_SIGTERM || s->state == SERVICE_STOP_SIGKILL || s->state == SERVICE_STOP_POST || @@ -1695,6 +1700,7 @@ static int service_stop(Unit *u) { /* Already on it */ if (s->state == SERVICE_STOP || + s->state == SERVICE_STOP_SIGABRT || s->state == SERVICE_STOP_SIGTERM || s->state == SERVICE_STOP_SIGKILL || s->state == SERVICE_STOP_POST || @@ -2126,6 +2132,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { service_enter_running(s, SERVICE_SUCCESS); break; + case SERVICE_STOP_SIGABRT: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: @@ -2252,6 +2259,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { service_enter_running(s, f); break; + case SERVICE_STOP_SIGABRT: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: @@ -2392,6 +2400,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { service_enter_signal(s, SERVICE_STOP_SIGTERM, f); break; + case SERVICE_STOP_SIGABRT: case SERVICE_STOP_SIGTERM: case SERVICE_STOP_SIGKILL: if (main_pid_good(s) <= 0) @@ -2461,6 +2470,12 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_TIMEOUT); break; + case SERVICE_STOP_SIGABRT: + log_warning_unit(UNIT(s)->id, + "%s stop-sigabrt timed out. Terminating.", UNIT(s)->id); + service_enter_signal(s, SERVICE_STOP_SIGTERM, s->result); + break; + case SERVICE_STOP_SIGTERM: if (s->kill_context.send_sigkill) { log_warning_unit(UNIT(s)->id, "%s stop-sigterm timed out. Killing.", UNIT(s)->id); @@ -2528,7 +2543,7 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void log_error_unit(UNIT(s)->id, "%s watchdog timeout (limit %s)!", UNIT(s)->id, format_timespan(t, sizeof(t), s->watchdog_usec, 1)); - service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_WATCHDOG); + service_enter_signal(s, SERVICE_STOP_SIGABRT, SERVICE_FAILURE_WATCHDOG); return 0; } diff --git a/src/core/service.h b/src/core/service.h index 0db0c4d64..54fbe46fa 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -39,6 +39,7 @@ typedef enum ServiceState { SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */ SERVICE_RELOAD, SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */ + SERVICE_STOP_SIGABRT, /* Watchdog timeout */ SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, diff --git a/src/core/socket.c b/src/core/socket.c index 9004cb42c..6ba8338d8 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1578,7 +1578,8 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) { r = unit_kill_context( UNIT(s), &s->kill_context, - state != SOCKET_STOP_PRE_SIGTERM && state != SOCKET_FINAL_SIGTERM, + (state != SOCKET_STOP_PRE_SIGTERM && state != SOCKET_FINAL_SIGTERM) ? + KILL_KILL : KILL_TERMINATE, -1, s->control_pid, false); diff --git a/src/core/swap.c b/src/core/swap.c index 13e12ad67..1add722bf 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -687,7 +687,8 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) { r = unit_kill_context( UNIT(s), &s->kill_context, - state != SWAP_ACTIVATING_SIGTERM && state != SWAP_DEACTIVATING_SIGTERM, + (state != SWAP_ACTIVATING_SIGTERM && state != SWAP_DEACTIVATING_SIGTERM) ? + KILL_KILL : KILL_TERMINATE, -1, s->control_pid, false); diff --git a/src/core/unit.c b/src/core/unit.c index 489ea1e50..84f210a31 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3313,7 +3313,7 @@ int unit_make_transient(Unit *u) { int unit_kill_context( Unit *u, KillContext *c, - bool sigkill, + KillOperation k, pid_t main_pid, pid_t control_pid, bool main_pid_alien) { @@ -3326,7 +3326,19 @@ int unit_kill_context( if (c->kill_mode == KILL_NONE) return 0; - sig = sigkill ? SIGKILL : c->kill_signal; + switch (k) { + case KILL_KILL: + sig = SIGKILL; + break; + case KILL_ABORT: + sig = SIGABRT; + break; + case KILL_TERMINATE: + sig = c->kill_signal; + break; + default: + assert_not_reached("KillOperation unknown"); + } if (main_pid > 0) { r = kill_and_sigcont(main_pid, sig); @@ -3340,7 +3352,7 @@ int unit_kill_context( if (!main_pid_alien) wait_for_exit = true; - if (c->send_sighup && !sigkill) + if (c->send_sighup && k != KILL_KILL) kill(main_pid, SIGHUP); } } @@ -3356,12 +3368,12 @@ int unit_kill_context( } else { wait_for_exit = true; - if (c->send_sighup && !sigkill) + if (c->send_sighup && k != KILL_KILL) kill(control_pid, SIGHUP); } } - if ((c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && sigkill)) && u->cgroup_path) { + if ((c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL)) && u->cgroup_path) { _cleanup_set_free_ Set *pid_set = NULL; /* Exclude the main/control pids from being killed via the cgroup */ @@ -3385,7 +3397,7 @@ int unit_kill_context( /* wait_for_exit = true; */ - if (c->send_sighup && !sigkill) { + if (c->send_sighup && k != KILL_KILL) { set_free(pid_set); pid_set = unit_pid_set(main_pid, control_pid); diff --git a/src/core/unit.h b/src/core/unit.h index bbad54635..081ab18f1 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -54,6 +54,12 @@ enum UnitActiveState { _UNIT_ACTIVE_STATE_INVALID = -1 }; +typedef enum KillOperation { + KILL_TERMINATE, + KILL_KILL, + KILL_ABORT, +} KillOperation; + static inline bool UNIT_IS_ACTIVE_OR_RELOADING(UnitActiveState t) { return t == UNIT_ACTIVE || t == UNIT_RELOADING; } @@ -576,7 +582,7 @@ int unit_write_drop_in_private_format(Unit *u, UnitSetPropertiesMode mode, const int unit_remove_drop_in(Unit *u, UnitSetPropertiesMode mode, const char *name); -int unit_kill_context(Unit *u, KillContext *c, bool sigkill, pid_t main_pid, pid_t control_pid, bool main_pid_alien); +int unit_kill_context(Unit *u, KillContext *c, KillOperation k, pid_t main_pid, pid_t control_pid, bool main_pid_alien); int unit_make_transient(Unit *u); -- 2.30.2