From 5e0d6d688cd6164aabf49e3efefff6b78983e07d Mon Sep 17 00:00:00 2001 From: Alexis Bauvin Date: Fri, 18 Oct 2024 11:36:08 +0200 Subject: [PATCH] watchfrr: force kill daemons on restart Today, watchfrr sends a SIGSTOP to a misbehaving daemon through frrcommon. The issue is, a stuck daemon (like in a thread starvation situation) will not honor a SIGSTOP, and watchfrr will try indefinitely to kill it. Let's not waste time and kill -9 from the get go. Signed-off-by: Tuetuopay --- tools/frrcommon.sh.in | 6 ++++-- watchfrr/watchfrr.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/frrcommon.sh.in b/tools/frrcommon.sh.in index 44d41956b3a3..5cf7a24a494f 100755 --- a/tools/frrcommon.sh.in +++ b/tools/frrcommon.sh.in @@ -212,8 +212,10 @@ daemon_stop() { return 1 fi - debug "kill -2 $pid" - kill -2 "$pid" + [ "$2" = "SIGKILL" ] && signal=-9 || signal=-2 + + debug "kill $signal $pid" + kill "$signal" "$pid" cnt=1200 while kill -0 "$pid" 2>/dev/null; do sleep .1 diff --git a/watchfrr/watchfrr.c b/watchfrr/watchfrr.c index acc612c0a858..417769143efd 100644 --- a/watchfrr/watchfrr.c +++ b/watchfrr/watchfrr.c @@ -50,7 +50,7 @@ #define DEFAULT_MAX_RESTART 600 #define DEFAULT_OPERATIONAL_TIMEOUT 60 -#define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s" +#define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s SIGKILL" #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s" #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"