Skip to content

Commit

Permalink
refactor(healthmon): monitors are design specific
Browse files Browse the repository at this point in the history
Ensure that health monitoring monitor arrays are board/design specific

Signed-off-by: Ivan Griffin <[email protected]>
  • Loading branch information
Ivan Griffin committed Oct 1, 2024
1 parent b22d5bb commit 4053ccf
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 82 deletions.
4 changes: 4 additions & 0 deletions boards/mpfs-icicle-kit-es/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ EXTRA_SRCS-y += \
EXTRA_SRCS-$(CONFIG_USE_LOGO) += \
init/hss_logo_init.c

EXTRA_SRCS-$(CONFIG_SERVICE_HEALTHMON) += \
$(BOARD_DIR)/healthmon_monitors.c \


$(BOARD_DIR)/hss_uart_init.o: CFLAGS=$(CFLAGS_GCCEXT)

ifndef CONFIG_SERVICE_BOOT_USE_PAYLOAD_IN_FABRIC
Expand Down
72 changes: 72 additions & 0 deletions boards/mpfs-icicle-kit-es/healthmon_monitors.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*******************************************************************************
* Copyright 2019-2023 Microchip FPGA Embedded Systems Solutions.
*
* SPDX-License-Identifier: MIT
*
* MPFS HSS Embedded Software
*
*/

/*!
* \file Health Monitor State Machine
* \brief E51-Assisted Health Monitor
*/

#include "config.h"
#include "hss_types.h"
#include "healthmon_service.h"

const struct HealthMonitor monitors[] =
{
{ "IOSCB_PLL_MSS:PLL_CTRL", 0x3E001004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 1u },
{ "IOSCB_PLL_DDR:PLL_CTRL", 0x3E010004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 1u },
{ "IOSCB_PLL_SGMII:PLL_CTRL", 0x3E001004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 1u },
{ "IOSCB_PLL:pll_nw_0:PLL_CTRL", 0x38100004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 1u },
//{ "IOSCB_PLL:pll_se_0:PLL_CTRL", 0x38010004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
//{ "IOSCB_PLL:pll_se_1:PLL_CTRL", 0x38020004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
//{ "IOSCB_PLL:pll_ne_0:PLL_CTRL", 0x38040004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
//{ "IOSCB_PLL:pll_ne_1:PLL_CTRL", 0x38080004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
//{ "IOSCB_PLL:pll_nw_1:PLL_CTRL", 0x38200004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
//{ "IOSCB_PLL:pll_sw_0:PLL_CTRL", 0x38400004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
//{ "IOSCB_PLL:pll_sw_1:PLL_CTRL", 0x38800004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },

{ "SYSREG:BOOT_FAIL_CR", 0x20002014, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 1u, healthmon_nop_trigger, 5u },
{ "SYSREG:DEVICE_STATUS", 0x20002024, NOT_EQUAL_TO_VALUE, 0x1F09u, 0u, 0u, 0x1FFF, healthmon_nop_trigger, 5u },
{ "SYSREG:MPU_VIOLATION_SR", 0x200020F0, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 1u, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_SR", 0x20002100, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 0x3FFF, healthmon_nop_trigger, 5u },

{ "SYSREG:EDAC_CNT_MMC", 0x20002108, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_DDRC", 0x2000210C, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_MAC0", 0x20002110, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_MAC1", 0x20002114, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_USB", 0x20002118, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_CAN0", 0x2000211c, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_CAN1", 0x20002120, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, healthmon_nop_trigger, 5u },

{ "SYSREG:MAINTENANCE_INT_SR", 0x20002148, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1FFFFF, healthmon_nop_trigger, 5u },// [20:0] == some cleared by writing 1, some y writing to PLL_STATUS
{ "SYSREG:PLL_STATUS_SR", 0x2000214c, NOT_EQUAL_TO_VALUE, 0x707u, 0u, 0u, 0x7FF, healthmon_nop_trigger, 5u },
{ "SYSREG:MISC_SR", 0x20002154, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 2u, healthmon_nop_trigger, 5u },
{ "SYSREG:DLL_STATUS_SR", 0x2000215c, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1FFFFF, healthmon_nop_trigger, 5u },

{ "IOSCBCFG:STATUS", 0x37080004, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 0xEu, healthmon_nop_trigger, 5u },// [3:1] => scb_buserr, timeout, scb_error

// unknown what the following should be...
{ "IOSCB_PLL:pll_se_0:PLL_CTRL", 0x38010004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
{ "IOSCB_PLL:pll_se_1:PLL_CTRL", 0x38020004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
{ "IOSCB_PLL:pll_ne_0:PLL_CTRL", 0x38040004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
{ "IOSCB_PLL:pll_ne_1:PLL_CTRL", 0x38080004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
{ "IOSCB_PLL:pll_nw_1:PLL_CTRL", 0x38200004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
{ "IOSCB_PLL:pll_sw_0:PLL_CTRL", 0x38400004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },
{ "IOSCB_PLL:pll_sw_1:PLL_CTRL", 0x38800004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, healthmon_nop_trigger, 5u },

{ "L2:Config:ECCDirFixCount", 0x02010108, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0xFFFFFFFFFFFFFFFFu, healthmon_nop_trigger, 1u },
{ "L2:Config:ECCDataFixCount", 0x02010148, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0xFFFFFFFFFFFFFFFFu, healthmon_nop_trigger, 1u },
{ "L2:Config:ECCDataFailCount", 0x02010168, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0xFFFFFFFFFFFFFFFFu, healthmon_nop_trigger, 1u },
};

struct HealthMonitor_Status monitor_status[ARRAY_SIZE(monitors)] =
{
{ 0u, 0u, 0u, false }
};

const size_t monitors_array_size = ARRAY_SIZE(monitors);
1 change: 1 addition & 0 deletions services/healthmon/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

SRCS-$(CONFIG_SERVICE_HEALTHMON) += \
services/healthmon/healthmon_service.c \
services/healthmon/healthmon_monitors_weak.c \

INCLUDES +=\
-I./services/healthmon \
14 changes: 14 additions & 0 deletions services/healthmon/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
The HealthMon service is a service that automatically checks an array of monitors for various out of bounds
or exceptional value conditions on every superloop of the HSS.

It relies on the following weakly-bound data structures

const struct HealthMonitor monitors[];
struct HealthMonitor_Status monitor_status[];
const size_t monitors_array_size;

The intention is that the board subdirectory selected will provide an implementation for these variables. If none is
provided, weakly bound versions will ensure the system links, but no health monitoring will be performed.

`monitors` is specific to each board/design, and `monitor_status` and `monitors_array_size` are derived from monitors.
As an example of use, please see `boards/mpfs-icicle-kit-es/healthmon_monitors.c`
21 changes: 21 additions & 0 deletions services/healthmon/healthmon_monitors_weak.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*******************************************************************************
* Copyright 2019-2023 Microchip FPGA Embedded Systems Solutions.
*
* SPDX-License-Identifier: MIT
*
* MPFS HSS Embedded Software
*
*/

/*!
* \file Health Monitor State Machine
* \brief E51-Assisted Health Monitor
*/

#include "config.h"
#include "hss_types.h"
#include "healthmon_service.h"

__attribute__((weak)) const struct HealthMonitor monitors[] = { };
__attribute__((weak)) struct HealthMonitor_Status monitor_status[ARRAY_SIZE(monitors)] = { };
__attribute__((weak)) const size_t monitors_array_size = ARRAY_SIZE(monitors);
90 changes: 8 additions & 82 deletions services/healthmon/healthmon_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,6 @@ struct StateMachine healthmon_service = {
.pInstanceData = NULL
};

enum CheckTypeEnum
{
ABOVE_THRESHOLD,
BELOW_THRESHOLD,
ABOVE_OR_BELOW_THRESHOLD,
EQUAL_TO_VALUE,
NOT_EQUAL_TO_VALUE,
CHANGED_SINCE_LAST,
};

char const * const checkName[] = {
"above threshold",
"below threshold",
Expand All @@ -87,80 +77,15 @@ char const * const checkName[] = {
};


static void nop_trigger(uintptr_t pAddr)
void healthmon_nop_trigger(uintptr_t pAddr)
{
;
}

static const struct HealthMonitor
{
char const * const pName;
uintptr_t pAddr;
enum CheckTypeEnum checkType;
uint32_t maxValue;
uint32_t minValue;
uint8_t shift; // shift applied first...
uint64_t mask; // then mask
void (*triggerCallback)(uintptr_t pAddr);
uint32_t throttleScale; // times 1sec, to throttle console messages
} monitors[] =
{
{ "IOSCB_PLL_MSS:PLL_CTRL", 0x3E001004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 1u },
{ "IOSCB_PLL_DDR:PLL_CTRL", 0x3E010004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 1u },
{ "IOSCB_PLL_SGMII:PLL_CTRL", 0x3E001004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 1u },
{ "IOSCB_PLL:pll_nw_0:PLL_CTRL", 0x38100004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 1u },
//{ "IOSCB_PLL:pll_se_0:PLL_CTRL", 0x38010004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },
//{ "IOSCB_PLL:pll_se_1:PLL_CTRL", 0x38020004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },
//{ "IOSCB_PLL:pll_ne_0:PLL_CTRL", 0x38040004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },
//{ "IOSCB_PLL:pll_ne_1:PLL_CTRL", 0x38080004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },
//{ "IOSCB_PLL:pll_nw_1:PLL_CTRL", 0x38200004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },
//{ "IOSCB_PLL:pll_sw_0:PLL_CTRL", 0x38400004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },
//{ "IOSCB_PLL:pll_sw_1:PLL_CTRL", 0x38800004, NOT_EQUAL_TO_VALUE, 1u, 0u, 25u, 1u, nop_trigger, 5u },

{ "SYSREG:BOOT_FAIL_CR", 0x20002014, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 1u, nop_trigger, 5u },
{ "SYSREG:DEVICE_STATUS", 0x20002024, NOT_EQUAL_TO_VALUE, 0x1F09u, 0u, 0u, 0x1FFF, nop_trigger, 5u },
{ "SYSREG:MPU_VIOLATION_SR", 0x200020F0, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 1u, nop_trigger, 5u },
{ "SYSREG:EDAC_SR", 0x20002100, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 0x3FFF, nop_trigger, 5u },

{ "SYSREG:EDAC_CNT_MMC", 0x20002108, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_DDRC", 0x2000210C, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_MAC0", 0x20002110, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_MAC1", 0x20002114, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_USB", 0x20002118, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_CAN0", 0x2000211c, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },
{ "SYSREG:EDAC_CNT_CAN1", 0x20002120, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1F, nop_trigger, 5u },

{ "SYSREG:MAINTENANCE_INT_SR", 0x20002148, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1FFFFF, nop_trigger, 5u },// [20:0] == some cleared by writing 1, some y writing to PLL_STATUS
{ "SYSREG:PLL_STATUS_SR", 0x2000214c, NOT_EQUAL_TO_VALUE, 0x707u, 0u, 0u, 0x7FF, nop_trigger, 5u },
{ "SYSREG:MISC_SR", 0x20002154, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 2u, nop_trigger, 5u },
{ "SYSREG:DLL_STATUS_SR", 0x2000215c, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0x1FFFFF, nop_trigger, 5u },

{ "IOSCBCFG:STATUS", 0x37080004, NOT_EQUAL_TO_VALUE, 0u, 0u, 0u, 0xEu, nop_trigger, 5u },// [3:1] => scb_buserr, timeout, scb_error

// unknown what the following should be...
{ "IOSCB_PLL:pll_se_0:PLL_CTRL", 0x38010004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },
{ "IOSCB_PLL:pll_se_1:PLL_CTRL", 0x38020004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },
{ "IOSCB_PLL:pll_ne_0:PLL_CTRL", 0x38040004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },
{ "IOSCB_PLL:pll_ne_1:PLL_CTRL", 0x38080004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },
{ "IOSCB_PLL:pll_nw_1:PLL_CTRL", 0x38200004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },
{ "IOSCB_PLL:pll_sw_0:PLL_CTRL", 0x38400004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },
{ "IOSCB_PLL:pll_sw_1:PLL_CTRL", 0x38800004, CHANGED_SINCE_LAST, 0u, 0u, 25u, 1u, nop_trigger, 5u },

{ "L2:Config:ECCDirFixCount", 0x02010108, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0xFFFFFFFFFFFFFFFFu, nop_trigger, 1u },
{ "L2:Config:ECCDataFixCount", 0x02010148, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0xFFFFFFFFFFFFFFFFu, nop_trigger, 1u },
{ "L2:Config:ECCDataFailCount", 0x02010168, CHANGED_SINCE_LAST, 0u, 0u, 0u, 0xFFFFFFFFFFFFFFFFu, nop_trigger, 1u },
};
extern const struct HealthMonitor monitors[];
extern struct HealthMonitor_Status monitor_status[];
extern const size_t monitors_array_size;

static struct HealthMonitor_Status
{
HSSTicks_t throttle_startTime;
uint32_t lastValue;
size_t count;
bool initialized;
} monitor_status[ARRAY_SIZE(monitors)] =
{
{ 0u, 0u, 0u, false }
};
// --------------------------------------------------------------------------------------------------
// Handlers for each state in the state machine
//
Expand All @@ -179,10 +104,10 @@ static void healthmon_monitoring_handler(struct StateMachine * const pMyMachine)

// general health monitoring...
{
for (size_t i = 0u; i < ARRAY_SIZE(monitors); i++) {
for (size_t i = 0u; i < monitors_array_size; i++) {
if (HSS_Timer_IsElapsed(monitor_status[i].throttle_startTime, monitors[i].throttleScale * ONE_SEC)) {
uint32_t value = *(uint32_t volatile *)(monitors[i].pAddr);
enum CheckTypeEnum checkType = monitors[i].checkType;
enum HealthMon_CheckType checkType = monitors[i].checkType;
bool triggered = false;

if (monitors[i].shift) { value = value >> monitors[i].shift; }
Expand Down Expand Up @@ -261,8 +186,9 @@ static void healthmon_monitoring_handler(struct StateMachine * const pMyMachine)
/////////////////
void HSS_Health_DumpStats(void)
{
mHSS_DEBUG_PRINTF(LOG_NORMAL, "monitors_array_size: %d\n", monitors_array_size);
mHSS_DEBUG_PRINTF(LOG_NORMAL, "Health Monitoring Counts per trigger:\n");
for (size_t i = 0u; i < ARRAY_SIZE(monitors); i++) {
for (size_t i = 0u; i < monitors_array_size; i++) {
char tmp_buffer[80] = "\0";

switch (monitors[i].checkType) {
Expand Down
31 changes: 31 additions & 0 deletions services/healthmon/healthmon_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,38 @@ extern "C" {
extern struct StateMachine healthmon_service;

void HSS_Health_DumpStats(void);
void healthmon_nop_trigger(uintptr_t pAddr);

enum HealthMon_CheckType
{
ABOVE_THRESHOLD,
BELOW_THRESHOLD,
ABOVE_OR_BELOW_THRESHOLD,
EQUAL_TO_VALUE,
NOT_EQUAL_TO_VALUE,
CHANGED_SINCE_LAST,
};

struct HealthMonitor
{
char const * const pName;
uintptr_t pAddr;
enum HealthMon_CheckType checkType;
uint32_t maxValue;
uint32_t minValue;
uint8_t shift; // shift applied first...
uint64_t mask; // then mask
void (*triggerCallback)(uintptr_t pAddr);
uint32_t throttleScale; // times 1sec, to throttle console messages
};

struct HealthMonitor_Status
{
HSSTicks_t throttle_startTime;
uint32_t lastValue;
size_t count;
bool initialized;
};
#ifdef __cplusplus
}
#endif
Expand Down

0 comments on commit 4053ccf

Please sign in to comment.