From: Shiju Jose <shiju.jose@huawei.com>
CXL spec 3.1 section 8.2.9.2.1.2 Table 8-46, DRAM Event Record has updated
with following new fields and new types for Memory Event Type, Transaction
Type and Validity Flags fields.
1. Component Identifier
2. Sub-channel
3. Advanced Programmable Corrected Memory Error Threshold Event Flags
4. Corrected Memory Error Count at Event
5. Memory Event Sub-Type
Add updates for the above spec changes in the CXL events record and CXL
DRAM trace event implementations.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
drivers/cxl/core/trace.h | 44 ++++++++++++++++++++++++++++++++--------
include/cxl/event.h | 7 ++++++-
2 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index e638e82429bc..20790dffa2b4 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -468,7 +468,7 @@ TRACE_EVENT(cxl_general_media,
/*
* DRAM Event Record - DER
*
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46
*/
/*
* DRAM Event Record defines many fields the same as the General Media Event
@@ -478,11 +478,17 @@ TRACE_EVENT(cxl_general_media,
#define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01
#define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02
#define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03
-#define show_dram_mem_event_type(type) __print_symbolic(type, \
+#define CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x04
+#define CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05
+#define CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION 0x06
+#define show_dram_mem_event_type(type) __print_symbolic(type, \
{ CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
{ CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \
{ CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
- { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
+ { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \
+ { CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \
+ { CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \
+ { CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \
)
#define CXL_DER_VALID_CHANNEL BIT(0)
@@ -493,7 +499,10 @@ TRACE_EVENT(cxl_general_media,
#define CXL_DER_VALID_ROW BIT(5)
#define CXL_DER_VALID_COLUMN BIT(6)
#define CXL_DER_VALID_CORRECTION_MASK BIT(7)
-#define show_dram_valid_flags(flags) __print_flags(flags, "|", \
+#define CXL_DER_VALID_COMPONENT BIT(8)
+#define CXL_DER_VALID_COMPONENT_ID_FORMAT BIT(9)
+#define CXL_DER_VALID_SUB_CHANNEL BIT(10)
+#define show_dram_valid_flags(flags) __print_flags(flags, "|", \
{ CXL_DER_VALID_CHANNEL, "CHANNEL" }, \
{ CXL_DER_VALID_RANK, "RANK" }, \
{ CXL_DER_VALID_NIBBLE, "NIBBLE" }, \
@@ -501,7 +510,9 @@ TRACE_EVENT(cxl_general_media,
{ CXL_DER_VALID_BANK, "BANK" }, \
{ CXL_DER_VALID_ROW, "ROW" }, \
{ CXL_DER_VALID_COLUMN, "COLUMN" }, \
- { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \
+ { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" }, \
+ { CXL_DER_VALID_COMPONENT, "COMPONENT" }, \
+ { CXL_DER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \
)
TRACE_EVENT(cxl_dram,
@@ -530,6 +541,11 @@ TRACE_EVENT(cxl_dram,
__field(u8, bank_group) /* Out of order to pack trace record */
__field(u8, bank) /* Out of order to pack trace record */
__field(u8, dpa_flags) /* Out of order to pack trace record */
+ __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
+ __field(u32, cvme_count)
+ __field(u8, sub_channel)
+ __field(u8, cme_threshold_ev_flags)
+ __field(u8, sub_type)
__string(region_name, cxlr ? dev_name(&cxlr->dev) : "")
),
@@ -554,7 +570,13 @@ TRACE_EVENT(cxl_dram,
__entry->column = get_unaligned_le16(rec->column);
memcpy(__entry->cor_mask, &rec->correction_mask,
CXL_EVENT_DER_CORRECTION_MASK_SIZE);
+ memcpy(__entry->comp_id, &rec->component_id,
+ CXL_EVENT_GEN_MED_COMP_ID_SIZE);
__entry->hpa = hpa;
+ __entry->sub_channel = rec->sub_channel;
+ __entry->cme_threshold_ev_flags = rec->cme_threshold_ev_flags;
+ __entry->cvme_count = get_unaligned_le24(rec->cvme_count);
+ __entry->sub_type = rec->sub_type;
if (cxlr) {
__assign_str(region_name);
uuid_copy(&__entry->region_uuid, &cxlr->params.uuid);
@@ -567,8 +589,9 @@ TRACE_EVENT(cxl_dram,
CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \
"transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
"bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
- "validity_flags='%s' " \
- "hpa=%llx region=%s region_uuid=%pUb",
+ "comp_id=%s validity_flags='%s' " \
+ "hpa=%llx sub_channel=%u cme_threshold_ev_flags='%s' " \
+ "cvme_count=%x sub_type='%s' region=%s region_uuid=%pUb",
__entry->dpa, show_dpa_flags(__entry->dpa_flags),
show_event_desc_flags(__entry->descriptor),
show_dram_mem_event_type(__entry->type),
@@ -577,8 +600,13 @@ TRACE_EVENT(cxl_dram,
__entry->bank_group, __entry->bank,
__entry->row, __entry->column,
__print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
+ cxl_print_component_id(__entry->validity_flags, CXL_DER_VALID_COMPONENT,
+ CXL_DER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
show_dram_valid_flags(__entry->validity_flags),
- __entry->hpa, __get_str(region_name), &__entry->region_uuid
+ __entry->hpa, __entry->sub_channel,
+ show_cme_threshold_ev_flags(__entry->cme_threshold_ev_flags),
+ __entry->cvme_count, show_mem_event_sub_type(__entry->sub_type),
+ __get_str(region_name), &__entry->region_uuid
)
);
diff --git a/include/cxl/event.h b/include/cxl/event.h
index ea8cd44a52e9..7e98492c85df 100644
--- a/include/cxl/event.h
+++ b/include/cxl/event.h
@@ -71,7 +71,12 @@ struct cxl_event_dram {
u8 row[3];
u8 column[2];
u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
- u8 reserved[0x17];
+ u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ u8 sub_channel;
+ u8 cme_threshold_ev_flags;
+ u8 cvme_count[3];
+ u8 sub_type;
+ u8 reserved;
} __packed;
/*
--
2.34.1
On Wed, 16 Oct 2024 17:33:48 +0100 <shiju.jose@huawei.com> wrote: > From: Shiju Jose <shiju.jose@huawei.com> > > CXL spec 3.1 section 8.2.9.2.1.2 Table 8-46, DRAM Event Record has updated > with following new fields and new types for Memory Event Type, Transaction > Type and Validity Flags fields. > 1. Component Identifier > 2. Sub-channel > 3. Advanced Programmable Corrected Memory Error Threshold Event Flags > 4. Corrected Memory Error Count at Event > 5. Memory Event Sub-Type > > Add updates for the above spec changes in the CXL events record and CXL > DRAM trace event implementations. > > Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Passing comments on two things inline. 1) There are a couple of whitespace consistency changes in here. Spaces to tabs for alignment. That's fine but maybe needs a brief mention in the patch description. 2) Really odd that the spec didn't have a component ID field for DRAM errors. They weren't all that useful before the PLDM format was added but still a curiosity that made me open up the 3.0 spec. Indeed, no such field. With that one line added to the patch description this looks good to me. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > --- > drivers/cxl/core/trace.h | 44 ++++++++++++++++++++++++++++++++-------- > include/cxl/event.h | 7 ++++++- > 2 files changed, 42 insertions(+), 9 deletions(-) > > diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h > index e638e82429bc..20790dffa2b4 100644 > --- a/drivers/cxl/core/trace.h > +++ b/drivers/cxl/core/trace.h > @@ -468,7 +468,7 @@ TRACE_EVENT(cxl_general_media, > /* > * DRAM Event Record - DER > * > - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 > + * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46 > */ > /* > * DRAM Event Record defines many fields the same as the General Media Event > @@ -478,11 +478,17 @@ TRACE_EVENT(cxl_general_media, > #define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR 0x01 > #define CXL_DER_MEM_EVT_TYPE_INV_ADDR 0x02 > #define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x03 > -#define show_dram_mem_event_type(type) __print_symbolic(type, \ > +#define CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION 0x04 > +#define CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE 0x05 > +#define CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION 0x06 > +#define show_dram_mem_event_type(type) __print_symbolic(type, \ This change looks odd here but does print the line above into the same formatting style as the other similar cases in the file. Maybe worth a line in the patch description to say "Includes trivial consistency of white space improvements" just to flag up that it was intentional. > { CXL_DER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \ > { CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR, "Scrub Media ECC Error" }, \ > { CXL_DER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \ > - { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \ > + { CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" }, \ > + { CXL_DER_MEM_EVT_TYPE_TE_STATE_VIOLATION, "TE State Violation" }, \ > + { CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE, "Adv Prog CME Counter Expiration" }, \ > + { CXL_DER_MEM_EVT_TYPE_CKID_VIOLATION, "CKID Violation" } \ > ) > > #define CXL_DER_VALID_CHANNEL BIT(0) > @@ -493,7 +499,10 @@ TRACE_EVENT(cxl_general_media, > #define CXL_DER_VALID_ROW BIT(5) > #define CXL_DER_VALID_COLUMN BIT(6) > #define CXL_DER_VALID_CORRECTION_MASK BIT(7) > -#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ > +#define CXL_DER_VALID_COMPONENT BIT(8) > +#define CXL_DER_VALID_COMPONENT_ID_FORMAT BIT(9) > +#define CXL_DER_VALID_SUB_CHANNEL BIT(10) > +#define show_dram_valid_flags(flags) __print_flags(flags, "|", \ As above this is a minor white space consistency change. > { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \ > { CXL_DER_VALID_RANK, "RANK" }, \ > { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \ > @@ -501,7 +510,9 @@ TRACE_EVENT(cxl_general_media, > { CXL_DER_VALID_BANK, "BANK" }, \ > { CXL_DER_VALID_ROW, "ROW" }, \ > { CXL_DER_VALID_COLUMN, "COLUMN" }, \ > - { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \ > + { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" }, \ > + { CXL_DER_VALID_COMPONENT, "COMPONENT" }, \ > + { CXL_DER_VALID_SUB_CHANNEL, "SUB CHANNEL" } \ > ) > diff --git a/include/cxl/event.h b/include/cxl/event.h > index ea8cd44a52e9..7e98492c85df 100644 > --- a/include/cxl/event.h > +++ b/include/cxl/event.h > @@ -71,7 +71,12 @@ struct cxl_event_dram { > u8 row[3]; > u8 column[2]; > u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; > - u8 reserved[0x17]; > + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; Odd that the general media had this field in 3.0 but DRAM didn't. I checked though and indeed the case! > + u8 sub_channel; > + u8 cme_threshold_ev_flags; > + u8 cvme_count[3]; > + u8 sub_type; > + u8 reserved; > } __packed; > > /*
© 2016 - 2024 Red Hat, Inc.