Skip to content

Commit

Permalink
Merge branch 'master' into mc_trigger
Browse files Browse the repository at this point in the history
Signed-off-by: winterddd <[email protected]>
  • Loading branch information
winterddd authored Jan 22, 2024
2 parents 28636a3 + 0e82389 commit 8cd607b
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 37 deletions.
12 changes: 12 additions & 0 deletions mce-amd-smca.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,18 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m)
channel, csrow);
}


if (e->vdata_len) {
uint64_t smca_config = e->vdata[2];

/*
* BIT 9 of the CONFIG register of a few SMCA Bank types indicates
* presence of FRU Text in SYND 1 / 2 registers
*/
if (smca_config & BIT(9))
memcpy(e->frutext, e->vdata, 16);
}

}

int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e)
Expand Down
5 changes: 0 additions & 5 deletions non-standard-ampere.c
Original file line number Diff line number Diff line change
Expand Up @@ -676,35 +676,30 @@ static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder,
enum amp_oem_data_type data_type,
int id, int64_t data, const char *text)
{
return 0;
}

static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder,
const char *type_str, const char *subtype_str,
const struct amp_payload0_type_sec *err)
{
return 0;
}

static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder,
const char *type_str, const char *subtype_str,
const struct amp_payload1_type_sec *err)
{
return 0;
}

static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder,
const char *type_str, const char *subtype_str,
const struct amp_payload2_type_sec *err)
{
return 0;
}

static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder,
const char *type_str, const char *subtype_str,
const struct amp_payload3_type_sec *err)
{
return 0;
}

static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, char *name)
Expand Down
27 changes: 17 additions & 10 deletions non-standard-yitian.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,16 +164,6 @@ void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder,
const char *subtype_str = oem_subtype_name(yitian_payload_error_type,
header->type, header->subtype);

#ifdef HAVE_SQLITE3
if (ras->record_events && !ev_decoder->stmt_dec_record) {
if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record,
&yitian_ddr_payload_section_tab) != SQLITE_OK) {
trace_seq_printf(s, "create sql fail\n");
return;
}
}
#endif

now = time(NULL);
tm = localtime(&now);
if (tm)
Expand Down Expand Up @@ -217,6 +207,22 @@ void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder,

}

static int add_yitian_common_table(struct ras_events *ras,
struct ras_ns_ev_decoder *ev_decoder)
{
#ifdef HAVE_SQLITE3
if (ras->record_events && !ev_decoder->stmt_dec_record) {
if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record,
&yitian_ddr_payload_section_tab) != SQLITE_OK) {
log(TERM, LOG_WARNING,
"Failed to create sql yitian_ddr_payload_section_tab\n");
return -1;
}
}
#endif
return 0;
}

/* error data decoding functions */
static int decode_yitian710_ns_error(struct ras_events *ras,
struct ras_ns_ev_decoder *ev_decoder,
Expand All @@ -239,6 +245,7 @@ static int decode_yitian710_ns_error(struct ras_events *ras,
struct ras_ns_ev_decoder yitian_ns_oem_decoder[] = {
{
.sec_type = "a6980811-16ea-4e4d-b936-fb00a23ff29c",
.add_table = add_yitian_common_table,
.decode = decode_yitian710_ns_error,
},
};
Expand Down
54 changes: 37 additions & 17 deletions ras-events.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#endif


extern char* choices_disable;
const static struct event_trigger event_triggers[] = {
{ "mc_event", &mc_event_trigger_setup },
};
Expand Down Expand Up @@ -156,6 +157,18 @@ static int get_tracing_dir(struct ras_events *ras)
return 0;
}

static int is_disabled_event(char *group, char *event) {
char ras_event_name[MAX_PATH + 1];

snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s",
group, event);

if (choices_disable != NULL && strlen(choices_disable) != 0 && strstr(choices_disable, ras_event_name)) {
return 1;
}
return 0;
}

/*
* Tracing enable/disable code
*/
Expand All @@ -164,6 +177,7 @@ static int __toggle_ras_mc_event(struct ras_events *ras,
{
int fd, rc;
char fname[MAX_PATH + 1];
enable = is_disabled_event(group, event) ? 0 : 1;

snprintf(fname, sizeof(fname), "%s%s:%s\n",
enable ? "" : "!",
Expand Down Expand Up @@ -855,6 +869,12 @@ static int add_event_handler(struct ras_events *ras, struct tep_handle *pevent,

ras->filters[id] = filter;

if (is_disabled_event(group, event)) {
log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n",
group, event);
return -EINVAL;
}

/* Enable RAS events */
rc = __toggle_ras_mc_event(ras, group, event, 1);
free(page);
Expand Down Expand Up @@ -924,7 +944,7 @@ int handle_ras_events(int record_events)
ras_mc_event_handler, NULL, MC_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "mc_event");

Expand All @@ -933,7 +953,7 @@ int handle_ras_events(int record_events)
ras_aer_event_handler, NULL, AER_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "aer_event");
#endif
Expand All @@ -943,7 +963,7 @@ int handle_ras_events(int record_events)
ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "non_standard_event");
#endif
Expand All @@ -953,7 +973,7 @@ int handle_ras_events(int record_events)
ras_arm_event_handler, NULL, ARM_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "arm_event");
#endif
Expand Down Expand Up @@ -987,7 +1007,7 @@ int handle_ras_events(int record_events)
/* tell kernel we are listening, so don't printk to console */
(void)open("/sys/kernel/debug/ras/daemon_active", 0);
num_events++;
} else
} else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "extlog_mem_event");
#endif
Expand All @@ -1004,7 +1024,7 @@ int handle_ras_events(int record_events)
ras_devlink_event_handler, filter_str, DEVLINK_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"devlink", "devlink_health_report");
#endif
Expand All @@ -1016,7 +1036,7 @@ int handle_ras_events(int record_events)
NULL, DISKERROR_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"block", "block_rq_error");
#else
Expand All @@ -1027,7 +1047,7 @@ int handle_ras_events(int record_events)
NULL, DISKERROR_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"block", "block_rq_complete");
}
Expand All @@ -1039,7 +1059,7 @@ int handle_ras_events(int record_events)
ras_memory_failure_event_handler, NULL, MF_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"ras", "memory_failure_event");
#endif
Expand All @@ -1049,63 +1069,63 @@ int handle_ras_events(int record_events)
ras_cxl_poison_event_handler, NULL, CXL_POISON_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_poison");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_aer_uncorrectable_error",
ras_cxl_aer_ue_event_handler, NULL, CXL_AER_UE_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_aer_uncorrectable_error");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_aer_correctable_error",
ras_cxl_aer_ce_event_handler, NULL, CXL_AER_CE_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_aer_correctable_error");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_overflow",
ras_cxl_overflow_event_handler, NULL, CXL_OVERFLOW_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_overflow");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_generic_event",
ras_cxl_generic_event_handler, NULL, CXL_GENERIC_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_generic_event");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_general_media",
ras_cxl_general_media_event_handler, NULL, CXL_GENERAL_MEDIA_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_general_media");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_dram",
ras_cxl_dram_event_handler, NULL, CXL_DRAM_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "cxl_dram");

rc = add_event_handler(ras, pevent, page_size, "cxl", "cxl_memory_module",
ras_cxl_memory_module_event_handler, NULL, CXL_MEMORY_MODULE_EVENT);
if (!rc)
num_events++;
else
else if (rc != -EINVAL)
log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
"cxl", "memory_module");
#endif
Expand Down
21 changes: 21 additions & 0 deletions ras-mce-handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,24 @@ static void report_mce_event(struct ras_events *ras,

trace_seq_printf(s, ", apicid= %x", e->apicid);

if (!e->vdata_len)
return;

if (strlen(e->frutext)) {
trace_seq_printf(s, ", FRU Text= %s", e->frutext);
trace_seq_printf(s, ", Vendor Data= ");
for (int i = 2; i < e->vdata_len/8; i++) {
trace_seq_printf(s, "0x%lx", e->vdata[i]);
trace_seq_printf(s, " ");
}
} else {
trace_seq_printf(s, ", Vendor Data= ");
for (int i = 0; i < e->vdata_len/8; i ++) {
trace_seq_printf(s, "0x%lx", e->vdata[i]);
trace_seq_printf(s, " ");
}
}

/*
* FIXME: The original mcelog userspace tool uses DMI to map from
* address to DIMM. From the comments there, the code there doesn't
Expand Down Expand Up @@ -548,6 +566,9 @@ int ras_mce_event_handler(struct trace_seq *s,
return -1;
e.ipid = val;

/* Get Vendor-specfic Data, if any */
e.vdata = tep_get_field_raw(s, event, "v_data", record, &e.vdata_len, 1);

switch (mce->cputype) {
case CPU_GENERIC:
break;
Expand Down
3 changes: 3 additions & 0 deletions ras-mce-handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,11 @@ struct mce_event {
uint8_t cpuvendor;
uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */
uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */
int32_t vdata_len;
const uint64_t *vdata;

/* Parsed data */
char frutext[17];
char timestamp[64];
char bank_name[64];
char error_msg[4096];
Expand Down
10 changes: 5 additions & 5 deletions ras-page-isolation.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,18 +171,18 @@ static void parse_isolation_env(struct isolation *config)
config->unit = no_unit ? config->unit : "";
}

static void parse_env_string(struct isolation *config, char *str)
static void parse_env_string(struct isolation *config, char *str, unsigned int size)
{
int i;

if (config->overflow) {
/* when overflow, use basic unit */
for (i = 0; config->units[i].name; i++) ;
sprintf(str, "%lu%s", config->val, config->units[i-1].name);
snprintf(str, size, "%lu%s", config->val, config->units[i-1].name);
log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n",
config->name, config->env);
} else {
sprintf(str, "%s%s", config->env, config->unit);
snprintf(str, size, "%s%s", config->env, config->unit);
}
}

Expand All @@ -199,8 +199,8 @@ static void page_isolation_init(void)

parse_isolation_env(&threshold);
parse_isolation_env(&cycle);
parse_env_string(&threshold, threshold_string);
parse_env_string(&cycle, cycle_string);
parse_env_string(&threshold, threshold_string, sizeof(threshold_string));
parse_env_string(&cycle, cycle_string, sizeof(cycle_string));
log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n",
threshold_string, cycle_string);
}
Expand Down
Loading

0 comments on commit 8cd607b

Please sign in to comment.