Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for kernel changes in CXL trace error events #178

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions ras-cxl-handler.c
Original file line number Diff line number Diff line change
Expand Up @@ -727,12 +727,12 @@ static const struct cxl_event_flags cxl_dpa_flags[] = {
* General Media Event Record - GMER
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
*/
#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0)
#define CXL_GMER_EVT_DESC_UNCORRECTABLE_EVENT BIT(0)
#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2)

static const struct cxl_event_flags cxl_gmer_event_desc_flags[] = {
{ .bit = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, .flag = "UNCORRECTABLE EVENT" },
{ .bit = CXL_GMER_EVT_DESC_UNCORRECTABLE_EVENT, .flag = "UNCORRECTABLE EVENT" },
{ .bit = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, .flag = "THRESHOLD EVENT" },
{ .bit = CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, .flag = "POISON LIST OVERFLOW" },
};
Expand Down Expand Up @@ -811,6 +811,26 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
ev.transaction_type)) <= 0)
return -1;

if (tep_get_field_val(s, event, "hpa", record, &val, 1) < 0)
return -1;
ev.hpa = val;
if (trace_seq_printf(s, "hpa:0x%llx ", (unsigned long long)ev.hpa) <= 0)
return -1;

ev.region = tep_get_field_raw(s, event, "region_name", record, &len, 1);
if (!ev.region)
return -1;
if (trace_seq_printf(s, "region:%s ", ev.region) <= 0)
return -1;

ev.region_uuid = tep_get_field_raw(s, event, "region_uuid",
record, &len, 1);
if (!ev.region_uuid)
return -1;
ev.region_uuid = uuid_be(ev.region_uuid);
if (trace_seq_printf(s, "region_uuid:%s ", ev.region_uuid) <= 0)
return -1;

if (tep_get_field_val(s, event, "validity_flags", record, &val, 1) < 0)
return -1;
ev.validity_flags = val;
Expand Down Expand Up @@ -933,6 +953,26 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
ev.transaction_type)) <= 0)
return -1;

if (tep_get_field_val(s, event, "hpa", record, &val, 1) < 0)
return -1;
ev.hpa = val;
if (trace_seq_printf(s, "hpa:0x%llx ", (unsigned long long)ev.hpa) <= 0)
return -1;

ev.region = tep_get_field_raw(s, event, "region", record, &len, 1);
if (!ev.region)
return -1;
if (trace_seq_printf(s, "region:%s ", ev.region) <= 0)
return -1;

ev.region_uuid = tep_get_field_raw(s, event, "region_uuid",
record, &len, 1);
if (!ev.region_uuid)
return -1;
ev.region_uuid = uuid_be(ev.region_uuid);
if (trace_seq_printf(s, "region_uuid:%s ", ev.region_uuid) <= 0)
return -1;

if (tep_get_field_val(s, event, "validity_flags", record, &val, 1) < 0)
return -1;
ev.validity_flags = val;
Expand Down
12 changes: 12 additions & 0 deletions ras-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,9 @@ static const struct db_fields cxl_general_media_event_fields[] = {
{ .name = "rank", .type = "INTEGER" },
{ .name = "device", .type = "INTEGER" },
{ .name = "comp_id", .type = "BLOB" },
{ .name = "hpa", .type = "INTEGER" },
{ .name = "region", .type = "TEXT" },
{ .name = "region_uuid", .type = "TEXT" },
};

static const struct db_table_descriptor cxl_general_media_event_tab = {
Expand Down Expand Up @@ -907,6 +910,9 @@ int ras_store_cxl_general_media_event(struct ras_events *ras,
sqlite3_bind_int(priv->stmt_cxl_general_media_event, 20, ev->device);
sqlite3_bind_blob(priv->stmt_cxl_general_media_event, 21, ev->comp_id,
CXL_EVENT_GEN_MED_COMP_ID_SIZE, NULL);
sqlite3_bind_int64(priv->stmt_cxl_general_media_event, 22, ev->hpa);
sqlite3_bind_text(priv->stmt_cxl_general_media_event, 23, ev->region, -1, NULL);
sqlite3_bind_text(priv->stmt_cxl_general_media_event, 24, ev->region_uuid, -1, NULL);

rc = sqlite3_step(priv->stmt_cxl_general_media_event);
if (rc != SQLITE_OK && rc != SQLITE_DONE)
Expand Down Expand Up @@ -951,6 +957,9 @@ static const struct db_fields cxl_dram_event_fields[] = {
{ .name = "row", .type = "INTEGER" },
{ .name = "column", .type = "INTEGER" },
{ .name = "cor_mask", .type = "BLOB" },
{ .name = "hpa", .type = "INTEGER" },
{ .name = "region", .type = "TEXT" },
{ .name = "region_uuid", .type = "TEXT" },
};

static const struct db_table_descriptor cxl_dram_event_tab = {
Expand Down Expand Up @@ -984,6 +993,9 @@ int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *
sqlite3_bind_int(priv->stmt_cxl_dram_event, 24, ev->column);
sqlite3_bind_blob(priv->stmt_cxl_dram_event, 25, ev->cor_mask,
CXL_EVENT_DER_CORRECTION_MASK_SIZE, NULL);
sqlite3_bind_int64(priv->stmt_cxl_dram_event, 26, ev->hpa);
sqlite3_bind_text(priv->stmt_cxl_dram_event, 27, ev->region, -1, NULL);
sqlite3_bind_text(priv->stmt_cxl_dram_event, 28, ev->region_uuid, -1, NULL);

rc = sqlite3_step(priv->stmt_cxl_dram_event);
if (rc != SQLITE_OK && rc != SQLITE_DONE)
Expand Down
6 changes: 6 additions & 0 deletions ras-record.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ struct ras_cxl_general_media_event {
uint32_t device;
uint8_t *comp_id;
uint16_t validity_flags;
uint64_t hpa;
const char *region;
const char *region_uuid;
};

struct ras_cxl_dram_event {
Expand All @@ -216,6 +219,9 @@ struct ras_cxl_dram_event {
uint16_t column;
uint8_t *cor_mask;
uint16_t validity_flags;
uint64_t hpa;
const char *region;
const char *region_uuid;
};

struct ras_cxl_memory_module_event {
Expand Down
12 changes: 12 additions & 0 deletions ras-report.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,9 @@ static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_gener
"descriptor=%u\n"
"type=%u\n"
"transaction_type=%u\n"
"hpa=0x%lx\n"
"region=%s\n"
"region_uuid=%s\n"
"channel=%u\n"
"rank=%u\n"
"device=0x%x\n",
Expand All @@ -574,6 +577,9 @@ static int set_cxl_general_media_event_backtrace(char *buf, struct ras_cxl_gener
ev->descriptor,
ev->type,
ev->transaction_type,
ev->hpa,
ev->region,
ev->region_uuid,
ev->channel,
ev->rank,
ev->device);
Expand Down Expand Up @@ -611,6 +617,9 @@ static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev
"descriptor=%u\n"
"type=%u\n"
"transaction_type=%u\n"
"hpa=0x%lx\n"
"region=%s\n"
"region_uuid=%s\n"
"channel=%u\n"
"rank=%u\n"
"nibble_mask=%u\n"
Expand All @@ -635,6 +644,9 @@ static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev
ev->descriptor,
ev->type,
ev->transaction_type,
ev->hpa,
ev->region,
ev->region_uuid,
ev->channel,
ev->rank,
ev->nibble_mask,
Expand Down
14 changes: 10 additions & 4 deletions util/ras-mc-ctl.in
Original file line number Diff line number Diff line change
Expand Up @@ -1957,10 +1957,10 @@ sub errors

# CXL general media errors
use constant CXL_EVENT_GEN_MED_COMP_ID_SIZE => 0x10;
$query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, rank, device, comp_id from cxl_general_media_event$conf{opt}{since} order by id";
$query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, rank, device, comp_id, hpa, region, region_uuid from cxl_general_media_event$conf{opt}{since} order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $mem_event_type, $transaction_type, $channel, $rank, $device, $comp_id));
$query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $mem_event_type, $transaction_type, $channel, $rank, $device, $comp_id, $hpa, $region, $region_uuid));
$out = "";
while($query_handle->fetch()) {
$out .= "$id $timestamp error: ";
Expand Down Expand Up @@ -1990,6 +1990,9 @@ sub errors
$out .= sprintf "%02x ", $bytes[$i];
}
}
$out .= sprintf "hpa=0x%llx, ", $hpa if (defined $hpa && length $hpa);
$out .= "region=$region, " if (defined $region && length $region);
$out .= "region_uuid=$region_uuid, " if (defined $region_uuid && length $region_uuid);
$out .= "\n";
}
if ($out ne "") {
Expand All @@ -2000,10 +2003,10 @@ sub errors

# CXL DRAM errors
use constant CXL_EVENT_DER_CORRECTION_MASK_SIZE => 0x20;
$query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, rank, nibble_mask, bank_group, bank, row, column, cor_mask from cxl_dram_event$conf{opt}{since} order by id";
$query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, rank, nibble_mask, bank_group, bank, row, column, cor_mask, hpa, region, region_uuid from cxl_dram_event$conf{opt}{since} order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
$query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $type, $transaction_type, $channel, $rank, $nibble_mask, $bank_group, $bank, $row, $column, $cor_mask));
$query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $type, $transaction_type, $channel, $rank, $nibble_mask, $bank_group, $bank, $row, $column, $cor_mask, $hpa, $region, $region_uuid));
$out = "";
while($query_handle->fetch()) {
$out .= "$id $timestamp error: ";
Expand Down Expand Up @@ -2037,6 +2040,9 @@ sub errors
$out .= sprintf "%02x ", $bytes[$i];
}
}
$out .= sprintf "hpa=0x%llx, ", $hpa if (defined $hpa && length $hpa);
$out .= "region=$region, " if (defined $region && length $region);
$out .= "region_uuid=$region_uuid, " if (defined $region_uuid && length $region_uuid);
$out .= "\n";
}
if ($out ne "") {
Expand Down
Loading