[PATCH RFC v2 3/3] cxl/memdev: Register for and process CPER events

Ira Weiny posted 3 patches 2 years, 1 month ago
There is a newer version of this series
[PATCH RFC v2 3/3] cxl/memdev: Register for and process CPER events
Posted by Ira Weiny 2 years, 1 month ago
If the firmware has configured CXL event support to be firmware first
the OS can process those events through CPER records.  Matching memory
devices to the CPER records can be done via the serial number which is
part of the CPER record header.

Detect firmware first, register a notifier callback for each memdev, and
trace events when they match a device registered.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from RFC v1:
[iweiny: adjust to cper_event enum instead of converting guids]
---
 drivers/cxl/core/mbox.c | 45 +++++++++++++++++++++++++-------
 drivers/cxl/cxlmem.h    |  7 +++++
 drivers/cxl/pci.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 4df4f614f490..3f760d1d21de 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -860,26 +860,51 @@ static const uuid_t mem_mod_event_uuid =
 	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
 		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
 
-static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
-				   enum cxl_event_log_type type,
-				   struct cxl_event_record_raw *record)
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+			    enum cxl_event_log_type type,
+			    struct cxl_event_record_raw *record,
+			    enum cxl_cper_event cper_event)
 {
-	uuid_t *id = &record->hdr.id;
-
-	if (uuid_equal(id, &gen_media_event_uuid)) {
+	switch (cper_event) {
+	case CXL_CPER_EVENT_GEN_MEDIA: {
 		struct cxl_event_gen_media *rec =
 				(struct cxl_event_gen_media *)record;
 
 		trace_cxl_general_media(cxlmd, type, rec);
-	} else if (uuid_equal(id, &dram_event_uuid)) {
+		break;
+		}
+	case CXL_CPER_EVENT_DRAM: {
 		struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
 
 		trace_cxl_dram(cxlmd, type, rec);
-	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
+		break;
+		}
+	case CXL_CPER_EVENT_MEM_MODULE: {
 		struct cxl_event_mem_module *rec =
 				(struct cxl_event_mem_module *)record;
 
 		trace_cxl_memory_module(cxlmd, type, rec);
+		break;
+		}
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
+
+static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+				     enum cxl_event_log_type type,
+				     struct cxl_event_record_raw *record)
+{
+	uuid_t *id = &record->hdr.id;
+
+	if (uuid_equal(id, &gen_media_event_uuid)) {
+		cxl_event_trace_record(cxlmd, type, record,
+				       CXL_CPER_EVENT_GEN_MEDIA);
+	} else if (uuid_equal(id, &dram_event_uuid)) {
+		cxl_event_trace_record(cxlmd, type, record,
+				       CXL_CPER_EVENT_DRAM);
+	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
+		cxl_event_trace_record(cxlmd, type, record,
+				       CXL_CPER_EVENT_MEM_MODULE);
 	} else {
 		/* For unknown record types print just the header */
 		trace_cxl_generic_event(cxlmd, type, record);
@@ -991,8 +1016,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
 			break;
 
 		for (i = 0; i < nr_rec; i++)
-			cxl_event_trace_record(cxlmd, type,
-					       &payload->records[i]);
+			__cxl_event_trace_record(cxlmd, type,
+						 &payload->records[i]);
 
 		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
 			trace_cxl_overflow(cxlmd, type, payload);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 706f8a6d1ef4..89bd85e7f51c 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -6,6 +6,7 @@
 #include <linux/cdev.h>
 #include <linux/uuid.h>
 #include <linux/rcuwait.h>
+#include <linux/efi.h>
 #include "cxl.h"
 
 /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -477,6 +478,8 @@ struct cxl_memdev_state {
 	struct cxl_security_state security;
 	struct cxl_fw_state fw;
 
+	struct notifier_block cxl_cper_nb;
+
 	struct rcuwait mbox_wait;
 	int (*mbox_send)(struct cxl_memdev_state *mds,
 			 struct cxl_mbox_cmd *cmd);
@@ -863,6 +866,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 				  unsigned long *cmds);
 void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+			    enum cxl_event_log_type type,
+			    struct cxl_event_record_raw *record,
+			    enum cxl_cper_event cper_event);
 int cxl_set_timestamp(struct cxl_memdev_state *mds);
 int cxl_poison_state_init(struct cxl_memdev_state *mds);
 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 44a21ab7add5..36d6f03e55de 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <asm-generic/unaligned.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/moduleparam.h>
 #include <linux/module.h>
@@ -10,6 +11,7 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/io.h>
+#include <linux/efi.h>
 #include "cxlmem.h"
 #include "cxlpci.h"
 #include "cxl.h"
@@ -748,6 +750,69 @@ static bool cxl_event_int_is_fw(u8 setting)
 	return mode == CXL_INT_FW;
 }
 
+#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
+int cxl_cper_event_call(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct cxl_cper_notifier_data *nd = data;
+	struct cxl_event_record_raw record = (struct cxl_event_record_raw) {
+		.hdr.id = UUID_INIT(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
+	};
+	enum cxl_event_log_type log_type;
+	struct cxl_memdev_state *mds;
+	u32 hdr_flags;
+
+	mds = container_of(nb, struct cxl_memdev_state, cxl_cper_nb);
+
+	/* Need serial number for device identification */
+	if (!(nd->rec->hdr.validation_bits & CPER_CXL_DEVICE_SN_VALID))
+		return NOTIFY_DONE;
+
+	/* FIXME endianess and bytes of serial number need verification */
+	/* FIXME Should other values be checked? */
+	if (memcmp(&mds->cxlds.serial, &nd->rec->hdr.dev_serial_num,
+		   sizeof(mds->cxlds.serial)))
+		return NOTIFY_DONE;
+
+	/* ensure record can always handle the full CPER provided data */
+	BUILD_BUG_ON(sizeof(record) <
+		(CPER_CXL_COMP_EVENT_LOG_SIZE + sizeof(record.hdr.id)));
+
+	/*
+	 * UEFI v2.10 defines N.2.14 defines the CXL CPER record as not
+	 * including the uuid field.
+	 */
+	memcpy(&record.hdr.length, &nd->rec->comp_event_log,
+		CPER_CXL_REC_LEN(nd->rec));
+
+	/* Fabricate a log type */
+	hdr_flags = get_unaligned_le24(record.hdr.flags);
+	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
+
+	cxl_event_trace_record(mds->cxlds.cxlmd, log_type, &record,
+			       nd->cper_event);
+
+	return NOTIFY_OK;
+}
+
+static void cxl_unregister_cper_events(void *_mds)
+{
+	struct cxl_memdev_state *mds = _mds;
+
+	unregister_cxl_cper_notifier(&mds->cxl_cper_nb);
+}
+
+static void register_cper_events(struct cxl_memdev_state *mds)
+{
+	mds->cxl_cper_nb.notifier_call = cxl_cper_event_call;
+
+	if (register_cxl_cper_notifier(&mds->cxl_cper_nb)) {
+		dev_err(mds->cxlds.dev, "CPER registration failed\n");
+		return;
+	}
+
+	devm_add_action_or_reset(mds->cxlds.dev, cxl_unregister_cper_events, mds);
+}
+
 static int cxl_event_config(struct pci_host_bridge *host_bridge,
 			    struct cxl_memdev_state *mds)
 {
@@ -758,8 +823,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
 	 * When BIOS maintains CXL error reporting control, it will process
 	 * event records.  Only one agent can do so.
 	 */
-	if (!host_bridge->native_cxl_error)
+	if (!host_bridge->native_cxl_error) {
+		register_cper_events(mds);
 		return 0;
+	}
 
 	rc = cxl_mem_alloc_event_buf(mds);
 	if (rc)

-- 
2.41.0
Re: [PATCH RFC v2 3/3] cxl/memdev: Register for and process CPER events
Posted by Smita Koralahalli 2 years, 1 month ago
Hi Ira,

On 10/26/2023 11:21 AM, Ira Weiny wrote:
> If the firmware has configured CXL event support to be firmware first
> the OS can process those events through CPER records.  Matching memory
> devices to the CPER records can be done via the serial number which is
> part of the CPER record header.
> 
> Detect firmware first, register a notifier callback for each memdev, and
> trace events when they match a device registered.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from RFC v1:
> [iweiny: adjust to cper_event enum instead of converting guids]
> ---
>   drivers/cxl/core/mbox.c | 45 +++++++++++++++++++++++++-------
>   drivers/cxl/cxlmem.h    |  7 +++++
>   drivers/cxl/pci.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 110 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 4df4f614f490..3f760d1d21de 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -860,26 +860,51 @@ static const uuid_t mem_mod_event_uuid =
>   	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
>   		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
>   
> -static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> -				   enum cxl_event_log_type type,
> -				   struct cxl_event_record_raw *record)
> +void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +			    enum cxl_event_log_type type,
> +			    struct cxl_event_record_raw *record,
> +			    enum cxl_cper_event cper_event)
>   {
> -	uuid_t *id = &record->hdr.id;
> -
> -	if (uuid_equal(id, &gen_media_event_uuid)) {
> +	switch (cper_event) {
> +	case CXL_CPER_EVENT_GEN_MEDIA: {
>   		struct cxl_event_gen_media *rec =
>   				(struct cxl_event_gen_media *)record;
>   
>   		trace_cxl_general_media(cxlmd, type, rec);
> -	} else if (uuid_equal(id, &dram_event_uuid)) {
> +		break;
> +		}
> +	case CXL_CPER_EVENT_DRAM: {
>   		struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
>   
>   		trace_cxl_dram(cxlmd, type, rec);
> -	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
> +		break;
> +		}
> +	case CXL_CPER_EVENT_MEM_MODULE: {
>   		struct cxl_event_mem_module *rec =
>   				(struct cxl_event_mem_module *)record;
>   
>   		trace_cxl_memory_module(cxlmd, type, rec);
> +		break;
> +		}
> +	}
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
> +
> +static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +				     enum cxl_event_log_type type,
> +				     struct cxl_event_record_raw *record)
> +{
> +	uuid_t *id = &record->hdr.id;
> +
> +	if (uuid_equal(id, &gen_media_event_uuid)) {
> +		cxl_event_trace_record(cxlmd, type, record,
> +				       CXL_CPER_EVENT_GEN_MEDIA);
> +	} else if (uuid_equal(id, &dram_event_uuid)) {
> +		cxl_event_trace_record(cxlmd, type, record,
> +				       CXL_CPER_EVENT_DRAM);
> +	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
> +		cxl_event_trace_record(cxlmd, type, record,
> +				       CXL_CPER_EVENT_MEM_MODULE);
>   	} else {
>   		/* For unknown record types print just the header */
>   		trace_cxl_generic_event(cxlmd, type, record);
> @@ -991,8 +1016,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>   			break;
>   
>   		for (i = 0; i < nr_rec; i++)
> -			cxl_event_trace_record(cxlmd, type,
> -					       &payload->records[i]);
> +			__cxl_event_trace_record(cxlmd, type,
> +						 &payload->records[i]);
>   
>   		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
>   			trace_cxl_overflow(cxlmd, type, payload);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 706f8a6d1ef4..89bd85e7f51c 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -6,6 +6,7 @@
>   #include <linux/cdev.h>
>   #include <linux/uuid.h>
>   #include <linux/rcuwait.h>
> +#include <linux/efi.h>
>   #include "cxl.h"
>   
>   /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
> @@ -477,6 +478,8 @@ struct cxl_memdev_state {
>   	struct cxl_security_state security;
>   	struct cxl_fw_state fw;
>   
> +	struct notifier_block cxl_cper_nb;
> +
>   	struct rcuwait mbox_wait;
>   	int (*mbox_send)(struct cxl_memdev_state *mds,
>   			 struct cxl_mbox_cmd *cmd);
> @@ -863,6 +866,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>   void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>   				  unsigned long *cmds);
>   void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
> +void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +			    enum cxl_event_log_type type,
> +			    struct cxl_event_record_raw *record,
> +			    enum cxl_cper_event cper_event);
>   int cxl_set_timestamp(struct cxl_memdev_state *mds);
>   int cxl_poison_state_init(struct cxl_memdev_state *mds);
>   int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 44a21ab7add5..36d6f03e55de 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -1,5 +1,6 @@
>   // SPDX-License-Identifier: GPL-2.0-only
>   /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> +#include <asm-generic/unaligned.h>
>   #include <linux/io-64-nonatomic-lo-hi.h>
>   #include <linux/moduleparam.h>
>   #include <linux/module.h>
> @@ -10,6 +11,7 @@
>   #include <linux/pci.h>
>   #include <linux/aer.h>
>   #include <linux/io.h>
> +#include <linux/efi.h>
>   #include "cxlmem.h"
>   #include "cxlpci.h"
>   #include "cxl.h"
> @@ -748,6 +750,69 @@ static bool cxl_event_int_is_fw(u8 setting)
>   	return mode == CXL_INT_FW;
>   }
>   
> +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> +int cxl_cper_event_call(struct notifier_block *nb, unsigned long action, void *data)
> +{
> +	struct cxl_cper_notifier_data *nd = data;
> +	struct cxl_event_record_raw record = (struct cxl_event_record_raw) {
> +		.hdr.id = UUID_INIT(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
> +	};
> +	enum cxl_event_log_type log_type;
> +	struct cxl_memdev_state *mds;
> +	u32 hdr_flags;
> +
> +	mds = container_of(nb, struct cxl_memdev_state, cxl_cper_nb);
> +
> +	/* Need serial number for device identification */
> +	if (!(nd->rec->hdr.validation_bits & CPER_CXL_DEVICE_SN_VALID))
> +		return NOTIFY_DONE;

For all the event records that I tested so far, this has never been 
true. That is CPER_CXL_DEVICE_SN_VALID is never set which might not log 
the records at all. Should we be bit more lenient here and include 
validating device_id (bdf) instead and check if cxlds exist?

pci_get_domain_bus_and_slot() and pci_get_drvdata()..

> +
> +	/* FIXME endianess and bytes of serial number need verification */
> +	/* FIXME Should other values be checked? */
> +	if (memcmp(&mds->cxlds.serial, &nd->rec->hdr.dev_serial_num,
> +		   sizeof(mds->cxlds.serial)))
> +		return NOTIFY_DONE;
> +
> +	/* ensure record can always handle the full CPER provided data */
> +	BUILD_BUG_ON(sizeof(record) <
> +		(CPER_CXL_COMP_EVENT_LOG_SIZE + sizeof(record.hdr.id)));
> +
> +	/*
> +	 * UEFI v2.10 defines N.2.14 defines the CXL CPER record as not
> +	 * including the uuid field.
> +	 */
> +	memcpy(&record.hdr.length, &nd->rec->comp_event_log,
> +		CPER_CXL_REC_LEN(nd->rec));

I'm doubtful this will do the job. I think we should copy into each 
field of struct cxl_event_record_hdr individually starting from length 
by pointer arithmetic (which is definitely bad, but I cannot think of a 
better way to do this) and then do memcpy for data field in struct 
cxl_event_record_raw..

Any other suggestions would be helpful as well.

I can make these changes and validate it on my end if that works..?

Thanks,
Smita

> +
> +	/* Fabricate a log type */
> +	hdr_flags = get_unaligned_le24(record.hdr.flags);
> +	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
> +
> +	cxl_event_trace_record(mds->cxlds.cxlmd, log_type, &record,
> +			       nd->cper_event);
> +
> +	return NOTIFY_OK;
> +}
> +
> +static void cxl_unregister_cper_events(void *_mds)
> +{
> +	struct cxl_memdev_state *mds = _mds;
> +
> +	unregister_cxl_cper_notifier(&mds->cxl_cper_nb);
> +}
> +
> +static void register_cper_events(struct cxl_memdev_state *mds)
> +{
> +	mds->cxl_cper_nb.notifier_call = cxl_cper_event_call;
> +
> +	if (register_cxl_cper_notifier(&mds->cxl_cper_nb)) {
> +		dev_err(mds->cxlds.dev, "CPER registration failed\n");
> +		return;
> +	}
> +
> +	devm_add_action_or_reset(mds->cxlds.dev, cxl_unregister_cper_events, mds);
> +}
> +
>   static int cxl_event_config(struct pci_host_bridge *host_bridge,
>   			    struct cxl_memdev_state *mds)
>   {
> @@ -758,8 +823,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
>   	 * When BIOS maintains CXL error reporting control, it will process
>   	 * event records.  Only one agent can do so.
>   	 */
> -	if (!host_bridge->native_cxl_error)
> +	if (!host_bridge->native_cxl_error) {
> +		register_cper_events(mds);
>   		return 0;
> +	}
>   
>   	rc = cxl_mem_alloc_event_buf(mds);
>   	if (rc)
>
Re: [PATCH RFC v2 3/3] cxl/memdev: Register for and process CPER events
Posted by Dan Williams 2 years, 1 month ago
Smita Koralahalli wrote:
[..]
> > +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> > +int cxl_cper_event_call(struct notifier_block *nb, unsigned long action, void *data)
> > +{
> > +	struct cxl_cper_notifier_data *nd = data;
> > +	struct cxl_event_record_raw record = (struct cxl_event_record_raw) {
> > +		.hdr.id = UUID_INIT(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
> > +	};
> > +	enum cxl_event_log_type log_type;
> > +	struct cxl_memdev_state *mds;
> > +	u32 hdr_flags;
> > +
> > +	mds = container_of(nb, struct cxl_memdev_state, cxl_cper_nb);
> > +
> > +	/* Need serial number for device identification */
> > +	if (!(nd->rec->hdr.validation_bits & CPER_CXL_DEVICE_SN_VALID))
> > +		return NOTIFY_DONE;
> 
> For all the event records that I tested so far, this has never been 
> true. That is CPER_CXL_DEVICE_SN_VALID is never set which might not log 
> the records at all. Should we be bit more lenient here and include 
> validating device_id (bdf) instead and check if cxlds exist?

Agree. While I do think those devices are out of spec given CXL mandates
a valid serial number, I think the robustness priciple applies and Linux
should rely on bdf information. I also expect that with MH-SLDs and
potentially other scenarios, a serial number may be duplicated so bdf is
more reliable in that dimension as well.

> > +	/* FIXME endianess and bytes of serial number need verification */
> > +	/* FIXME Should other values be checked? */
> > +	if (memcmp(&mds->cxlds.serial, &nd->rec->hdr.dev_serial_num,
> > +		   sizeof(mds->cxlds.serial)))
> > +		return NOTIFY_DONE;
> > +
> > +	/* ensure record can always handle the full CPER provided data */
> > +	BUILD_BUG_ON(sizeof(record) <
> > +		(CPER_CXL_COMP_EVENT_LOG_SIZE + sizeof(record.hdr.id)));
> > +
> > +	/*
> > +	 * UEFI v2.10 defines N.2.14 defines the CXL CPER record as not
> > +	 * including the uuid field.
> > +	 */
> > +	memcpy(&record.hdr.length, &nd->rec->comp_event_log,
> > +		CPER_CXL_REC_LEN(nd->rec));
> 
> I'm doubtful this will do the job. I think we should copy into each 
> field of struct cxl_event_record_hdr individually starting from length 
> by pointer arithmetic (which is definitely bad, but I cannot think of a 
> better way to do this) and then do memcpy for data field in struct 
> cxl_event_record_raw..
> 
> Any other suggestions would be helpful as well.
> 
> I can make these changes and validate it on my end if that works..?

It sounds like you have a more readily available real world test
environment for this, so that sounds good to me.
RE: [PATCH RFC v2 3/3] cxl/memdev: Register for and process CPER events
Posted by Dan Williams 2 years, 1 month ago
Ira Weiny wrote:
> If the firmware has configured CXL event support to be firmware first
> the OS can process those events through CPER records.  Matching memory
> devices to the CPER records can be done via the serial number which is
> part of the CPER record header.
> 
> Detect firmware first, register a notifier callback for each memdev, and
> trace events when they match a device registered.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
[..]

The changes requested in patch2 cover all of the comments I currently
have on this patch, just one more cleanup below:

> +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> +int cxl_cper_event_call(struct notifier_block *nb, unsigned long action, void *data)
> +{
> +	struct cxl_cper_notifier_data *nd = data;
> +	struct cxl_event_record_raw record = (struct cxl_event_record_raw) {
> +		.hdr.id = UUID_INIT(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
> +	};

Just do:

	struct cxl_event_record_raw record = { 0 };

...and the compiler will take care of the rest as initializing any field
automatically initializes everything else to zero.