Skip to content

Commit

Permalink
Fix #26: add support for multiple watchdog devices
Browse files Browse the repository at this point in the history
Signed-off-by: Joachim Wiberg <[email protected]>
  • Loading branch information
troglobit committed Jan 1, 2024
1 parent 21008de commit c0752d9
Show file tree
Hide file tree
Showing 6 changed files with 438 additions and 216 deletions.
44 changes: 41 additions & 3 deletions man/watchdogd.conf.5
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
The default
.Xr watchdogd 8
use-case does not require a configuration file. However, enabling a
health monitor plugin or the process supervisor is done using
health monitor plugin, the process supervisor, or multiple watchdog
device nodes, is done using
.Pa /etc/watchdogd.conf .
.Pp
Available monitor plugins are:
Available health monitor plugins:
.Bl -tag -width supervisor
.It Cm supervisor
Process supervisor, monitor the heartbeat of processes
Expand Down Expand Up @@ -78,6 +79,24 @@ script.sh {filenr, loadavg, meminfo} {crit, warn} VALUE
.Ed
.Pp
Health monitor plugins also have their own local script setting.
.It Cm device Ar /path/to/device Ar {}
.Nm watchdogd
supports kicking multiple watchdog devices. By default, and with no
command line arguemts,
.Pa /dev/watchdog
is used. If that is your system, this section is not necessary. This
section is only useful if you want everything in the configuration file
or have multiple watchdog devices. See
.Sx EXAMPLE
section below.
.Bl -tag -width TERM
.It Cm timeout = Ar SEC
Same as global option.
.It Cm interval = Ar SEC
Same as global option.
.It Cm safe-exit = Ar true | false
Same as global option.
.El
.It Cm reset-reason Ar {}
This section controls the reset reason, including the reset counter. By
default this is disabled, since not all systems allow writing to disk,
Expand Down Expand Up @@ -429,17 +448,36 @@ active. The format is not guaranteed to be stable between releases, but
will most likely be anyway. See
.Ql /run/watchdogd/tempmon.json .
.Sh EXAMPLE
.Bd -unfilled -offset indent
.Bd -unfilled
### /etc/watchdogd.conf

### Watchdogs ##########################################################
# Global settings that can be overridden per watchdog
timeout = 20
interval = 10
safe-exit = false

# Multiple watchdogs can be kicked, the default, even if no .conf file
# is found or device node given on the command line, is /dev/watchdog
device /dev/watchdog {
timeout = 20
interval = 10
safe-exit = false
}

#device /dev/watchdog2 {
# timeout = 20
# interval = 10
# safe-exit = false
#}

### Process Supervisor #################################################
supervisor {
enabled = true
priority = 98
}

### Reset Reason #######################################################
reset-reason {
enabled = true
file = "/var/lib/misc/watchdogd.state"
Expand Down
31 changes: 31 additions & 0 deletions src/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,26 @@ static int generic_checker(uev_ctx_t *ctx, cfg_t *cfg)
}
#endif

static int wdt_checker(uev_ctx_t *ctx, cfg_t *cfg, const char *sect)
{
unsigned int i;
int rc = 0;

for (i = 0; i < cfg_size(cfg, sect); i++) {
cfg_t *sec = cfg_getnsec(cfg, sect, i);
const char *name = cfg_title(sec);
int interval, timeout, safe;

interval = cfg_getint(sec, "interval");
timeout = cfg_getint(sec, "timeout");
safe = cfg_getbool(sec, "safe-exit");

rc += wdt_add(name, interval, timeout, safe, 0);
}

return rc;
}

static int validate_reset_reason(uev_ctx_t *ctx, cfg_t *cfg)
{
if (!cfg)
Expand Down Expand Up @@ -189,6 +209,12 @@ static void conf_errfunc(cfg_t *cfg, const char *format, va_list args)

int conf_parse_file(uev_ctx_t *ctx, char *file)
{
cfg_opt_t device_opts[] = {
CFG_INT ("interval", WDT_KICK_DEFAULT, CFGF_NONE),
CFG_INT ("timeout", WDT_TIMEOUT_DEFAULT, CFGF_NONE),
CFG_BOOL("safe-exit", cfg_false, CFGF_NONE),
CFG_END()
};
cfg_opt_t supervisor_opts[] = {
CFG_BOOL("enabled", cfg_false, CFGF_NONE),
CFG_INT ("priority", 0, CFGF_NONE),
Expand Down Expand Up @@ -224,6 +250,7 @@ int conf_parse_file(uev_ctx_t *ctx, char *file)
CFG_INT ("interval", WDT_KICK_DEFAULT, CFGF_NONE),
CFG_INT ("timeout", WDT_TIMEOUT_DEFAULT, CFGF_NONE),
CFG_BOOL("safe-exit", cfg_false, CFGF_NONE),
CFG_SEC ("device", device_opts, CFGF_MULTI | CFGF_TITLE),
CFG_SEC ("supervisor", supervisor_opts, CFGF_NONE),
CFG_SEC ("reset-cause", reset_reason_opts, CFGF_NONE), /* Compat only */
CFG_SEC ("reset-reason", reset_reason_opts, CFGF_NONE),
Expand Down Expand Up @@ -294,6 +321,10 @@ int conf_parse_file(uev_ctx_t *ctx, char *file)
opt = cfg_getnsec(cfg, "reset-cause", 0); /* Compat only */
validate_reset_reason(ctx, opt);

wdt_mark();
wdt_checker(ctx, cfg, "device");
wdt_sweep();

#ifdef FILENR_PLUGIN
checker(ctx, cfg, "filenr", filenr_init);
#endif
Expand Down
6 changes: 3 additions & 3 deletions src/watchdogd.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ static void exit_cb(uev_t *w, void *arg, int events)
{
DEBUG("Got signal %d, rebooting:%d ...", w->signo, rebooting);
if (rebooting) {
wdt_exit(w->ctx);
wdt_reboot(w->ctx);
return;
}

wdt_close(w->ctx);
wdt_exit(w->ctx);
}

static void reboot_cb(uev_t *w, void *arg, int events)
Expand All @@ -75,7 +75,7 @@ static void reboot_cb(uev_t *w, void *arg, int events)

DEBUG("Got signal %d, rebooting:%d ...", w->signo, rebooting);
if (rebooting) {
wdt_exit(w->ctx);
wdt_reboot(w->ctx);
return;
}

Expand Down
Loading

0 comments on commit c0752d9

Please sign in to comment.