[PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging

Terry Bowman posted 16 patches 6 months, 2 weeks ago
[PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging
Posted by Terry Bowman 6 months, 2 weeks ago
The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
for all errors. Update the driver and aer_event tracing to log 'CXL Bus
Type' for CXL device errors.

This requires the AER can identify and distinguish between PCIe errors and
CXL errors.

Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
aer_get_device_error_info() and pci_print_aer().

Update the aer_event trace routine to accept a bus type string parameter.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/pci/pci.h       |  6 ++++++
 drivers/pci/pcie/aer.c  | 18 ++++++++++++------
 include/ras/ras_event.h |  9 ++++++---
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b81e99cd4b62..d6296500b004 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
 struct aer_err_info {
 	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
 	int error_dev_num;
+	bool is_cxl;
 
 	unsigned int id:16;
 
@@ -604,6 +605,11 @@ struct aer_err_info {
 	struct pcie_tlp_log tlp;	/* TLP Header */
 };
 
+static inline const char *aer_err_bus(struct aer_err_info *info)
+{
+	return info->is_cxl ? "CXL" : "PCIe";
+}
+
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index a1cf8c7ef628..adb4b1123b9b 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -698,13 +698,14 @@ static void __aer_print_error(struct pci_dev *dev,
 
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 {
+	const char *bus_type = aer_err_bus(info);
 	int layer, agent;
 	int id = pci_dev_id(dev);
 	const char *level;
 
 	if (!info->status) {
-		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
-			aer_error_severity_string[info->severity]);
+		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
+			bus_type, aer_error_severity_string[info->severity]);
 		goto out;
 	}
 
@@ -713,8 +714,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 
 	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
 
-	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
-		   aer_error_severity_string[info->severity],
+	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
+		   bus_type, aer_error_severity_string[info->severity],
 		   aer_error_layer[layer], aer_agent_string[agent]);
 
 	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
@@ -729,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	if (info->id && info->error_dev_num > 1 && info->id == id)
 		pci_err(dev, "  Error of this Agent is reported first\n");
 
-	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+	trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
 			info->severity, info->tlp_header_valid, &info->tlp);
 }
 
@@ -763,6 +764,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 void pci_print_aer(struct pci_dev *dev, int aer_severity,
 		   struct aer_capability_regs *aer)
 {
+	const char *bus_type;
 	int layer, agent, tlp_header_valid = 0;
 	u32 status, mask;
 	struct aer_err_info info;
@@ -784,6 +786,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
 	info.status = status;
 	info.mask = mask;
 	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
+	info.is_cxl = pcie_is_cxl(dev);
+
+	bus_type = aer_err_bus(&info);
 
 	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
 	__aer_print_error(dev, &info);
@@ -797,7 +802,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
 	if (tlp_header_valid)
 		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
 
-	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
 			aer_severity, tlp_header_valid, &aer->header_log);
 }
 EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
@@ -1215,6 +1220,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
 	/* Must reset in this function */
 	info->status = 0;
 	info->tlp_header_valid = 0;
+	info->is_cxl = pcie_is_cxl(dev);
 
 	/* The device might not support AER */
 	if (!aer)
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 14c9f943d53f..080829d59c36 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
 
 TRACE_EVENT(aer_event,
 	TP_PROTO(const char *dev_name,
+		 const char *bus_type,
 		 const u32 status,
 		 const u8 severity,
 		 const u8 tlp_header_valid,
 		 struct pcie_tlp_log *tlp),
 
-	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
+	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
 
 	TP_STRUCT__entry(
 		__string(	dev_name,	dev_name	)
+		__string(	bus_type,	bus_type	)
 		__field(	u32,		status		)
 		__field(	u8,		severity	)
 		__field(	u8, 		tlp_header_valid)
@@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
 
 	TP_fast_assign(
 		__assign_str(dev_name);
+		__assign_str(bus_type);
 		__entry->status		= status;
 		__entry->severity	= severity;
 		__entry->tlp_header_valid = tlp_header_valid;
@@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
 		}
 	),
 
-	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
-		__get_str(dev_name),
+	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
+		__get_str(dev_name), __get_str(bus_type),
 		__entry->severity == AER_CORRECTABLE ? "Corrected" :
 			__entry->severity == AER_FATAL ?
 			"Fatal" : "Uncorrected, non-fatal",
-- 
2.34.1
Re: [PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging
Posted by Dave Jiang 6 months, 2 weeks ago

On 6/3/25 10:22 AM, Terry Bowman wrote:
> The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
> for all errors. Update the driver and aer_event tracing to log 'CXL Bus
> Type' for CXL device errors.
> 
> This requires the AER can identify and distinguish between PCIe errors and
> CXL errors.
> 
> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
> aer_get_device_error_info() and pci_print_aer().
> 
> Update the aer_event trace routine to accept a bus type string parameter.
> 
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> Reviewed-by: Ira Weiny <ira.weiny@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/pci/pci.h       |  6 ++++++
>  drivers/pci/pcie/aer.c  | 18 ++++++++++++------
>  include/ras/ras_event.h |  9 ++++++---
>  3 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index b81e99cd4b62..d6296500b004 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>  struct aer_err_info {
>  	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>  	int error_dev_num;
> +	bool is_cxl;
>  
>  	unsigned int id:16;
>  
> @@ -604,6 +605,11 @@ struct aer_err_info {
>  	struct pcie_tlp_log tlp;	/* TLP Header */
>  };
>  
> +static inline const char *aer_err_bus(struct aer_err_info *info)
> +{
> +	return info->is_cxl ? "CXL" : "PCIe";
> +}
> +
>  int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>  
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index a1cf8c7ef628..adb4b1123b9b 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -698,13 +698,14 @@ static void __aer_print_error(struct pci_dev *dev,
>  
>  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  {
> +	const char *bus_type = aer_err_bus(info);
>  	int layer, agent;
>  	int id = pci_dev_id(dev);
>  	const char *level;
>  
>  	if (!info->status) {
> -		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
> -			aer_error_severity_string[info->severity]);
> +		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
> +			bus_type, aer_error_severity_string[info->severity]);
>  		goto out;
>  	}
>  
> @@ -713,8 +714,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  
>  	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
>  
> -	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> -		   aer_error_severity_string[info->severity],
> +	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
> +		   bus_type, aer_error_severity_string[info->severity],
>  		   aer_error_layer[layer], aer_agent_string[agent]);
>  
>  	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
> @@ -729,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  	if (info->id && info->error_dev_num > 1 && info->id == id)
>  		pci_err(dev, "  Error of this Agent is reported first\n");
>  
> -	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
> +	trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
>  			info->severity, info->tlp_header_valid, &info->tlp);
>  }
>  
> @@ -763,6 +764,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
>  void pci_print_aer(struct pci_dev *dev, int aer_severity,
>  		   struct aer_capability_regs *aer)
>  {
> +	const char *bus_type;
>  	int layer, agent, tlp_header_valid = 0;
>  	u32 status, mask;
>  	struct aer_err_info info;
> @@ -784,6 +786,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>  	info.status = status;
>  	info.mask = mask;
>  	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
> +	info.is_cxl = pcie_is_cxl(dev);
> +
> +	bus_type = aer_err_bus(&info);
>  
>  	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>  	__aer_print_error(dev, &info);
> @@ -797,7 +802,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>  	if (tlp_header_valid)
>  		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
>  
> -	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
> +	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
>  			aer_severity, tlp_header_valid, &aer->header_log);
>  }
>  EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
> @@ -1215,6 +1220,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>  	/* Must reset in this function */
>  	info->status = 0;
>  	info->tlp_header_valid = 0;
> +	info->is_cxl = pcie_is_cxl(dev);
>  
>  	/* The device might not support AER */
>  	if (!aer)
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index 14c9f943d53f..080829d59c36 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
>  
>  TRACE_EVENT(aer_event,
>  	TP_PROTO(const char *dev_name,
> +		 const char *bus_type,
>  		 const u32 status,
>  		 const u8 severity,
>  		 const u8 tlp_header_valid,
>  		 struct pcie_tlp_log *tlp),
>  
> -	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
> +	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
>  
>  	TP_STRUCT__entry(
>  		__string(	dev_name,	dev_name	)
> +		__string(	bus_type,	bus_type	)
>  		__field(	u32,		status		)
>  		__field(	u8,		severity	)
>  		__field(	u8, 		tlp_header_valid)
> @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
>  
>  	TP_fast_assign(
>  		__assign_str(dev_name);
> +		__assign_str(bus_type);
>  		__entry->status		= status;
>  		__entry->severity	= severity;
>  		__entry->tlp_header_valid = tlp_header_valid;
> @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
>  		}
>  	),
>  
> -	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
> -		__get_str(dev_name),
> +	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
> +		__get_str(dev_name), __get_str(bus_type),
>  		__entry->severity == AER_CORRECTABLE ? "Corrected" :
>  			__entry->severity == AER_FATAL ?
>  			"Fatal" : "Uncorrected, non-fatal",
Re: [PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging
Posted by Sathyanarayanan Kuppuswamy 6 months, 2 weeks ago
On 6/3/25 10:22 AM, Terry Bowman wrote:
> The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
> for all errors. Update the driver and aer_event tracing to log 'CXL Bus
> Type' for CXL device errors.
>
> This requires the AER can identify and distinguish between PCIe errors and
> CXL errors.
>
> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
> aer_get_device_error_info() and pci_print_aer().
>
> Update the aer_event trace routine to accept a bus type string parameter.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
> ---
>   drivers/pci/pci.h       |  6 ++++++
>   drivers/pci/pcie/aer.c  | 18 ++++++++++++------
>   include/ras/ras_event.h |  9 ++++++---
>   3 files changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index b81e99cd4b62..d6296500b004 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>   struct aer_err_info {
>   	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>   	int error_dev_num;
> +	bool is_cxl;

Do you really need this member ? Why not just use pcie_is_cxl() in aer_err_bus()?

>   
>   	unsigned int id:16;
>   
> @@ -604,6 +605,11 @@ struct aer_err_info {
>   	struct pcie_tlp_log tlp;	/* TLP Header */
>   };
>   
> +static inline const char *aer_err_bus(struct aer_err_info *info)
> +{
> +	return info->is_cxl ? "CXL" : "PCIe";
> +}
> +
>   int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>   void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>   
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index a1cf8c7ef628..adb4b1123b9b 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -698,13 +698,14 @@ static void __aer_print_error(struct pci_dev *dev,
>   
>   void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   {
> +	const char *bus_type = aer_err_bus(info);
>   	int layer, agent;
>   	int id = pci_dev_id(dev);
>   	const char *level;
>   
>   	if (!info->status) {
> -		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
> -			aer_error_severity_string[info->severity]);
> +		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
> +			bus_type, aer_error_severity_string[info->severity]);
>   		goto out;
>   	}
>   
> @@ -713,8 +714,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   
>   	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
>   
> -	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
> -		   aer_error_severity_string[info->severity],
> +	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
> +		   bus_type, aer_error_severity_string[info->severity],
>   		   aer_error_layer[layer], aer_agent_string[agent]);
>   
>   	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
> @@ -729,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   	if (info->id && info->error_dev_num > 1 && info->id == id)
>   		pci_err(dev, "  Error of this Agent is reported first\n");
>   
> -	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
> +	trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
>   			info->severity, info->tlp_header_valid, &info->tlp);
>   }
>   
> @@ -763,6 +764,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
>   void pci_print_aer(struct pci_dev *dev, int aer_severity,
>   		   struct aer_capability_regs *aer)
>   {
> +	const char *bus_type;
>   	int layer, agent, tlp_header_valid = 0;
>   	u32 status, mask;
>   	struct aer_err_info info;
> @@ -784,6 +786,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>   	info.status = status;
>   	info.mask = mask;
>   	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
> +	info.is_cxl = pcie_is_cxl(dev);
> +
> +	bus_type = aer_err_bus(&info);
>   
>   	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>   	__aer_print_error(dev, &info);
> @@ -797,7 +802,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>   	if (tlp_header_valid)
>   		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
>   
> -	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
> +	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
>   			aer_severity, tlp_header_valid, &aer->header_log);
>   }
>   EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
> @@ -1215,6 +1220,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>   	/* Must reset in this function */
>   	info->status = 0;
>   	info->tlp_header_valid = 0;
> +	info->is_cxl = pcie_is_cxl(dev);
>   
>   	/* The device might not support AER */
>   	if (!aer)
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index 14c9f943d53f..080829d59c36 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
>   
>   TRACE_EVENT(aer_event,
>   	TP_PROTO(const char *dev_name,
> +		 const char *bus_type,
>   		 const u32 status,
>   		 const u8 severity,
>   		 const u8 tlp_header_valid,
>   		 struct pcie_tlp_log *tlp),
>   
> -	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
> +	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
>   
>   	TP_STRUCT__entry(
>   		__string(	dev_name,	dev_name	)
> +		__string(	bus_type,	bus_type	)
>   		__field(	u32,		status		)
>   		__field(	u8,		severity	)
>   		__field(	u8, 		tlp_header_valid)
> @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
>   
>   	TP_fast_assign(
>   		__assign_str(dev_name);
> +		__assign_str(bus_type);
>   		__entry->status		= status;
>   		__entry->severity	= severity;
>   		__entry->tlp_header_valid = tlp_header_valid;
> @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
>   		}
>   	),
>   
> -	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
> -		__get_str(dev_name),
> +	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
> +		__get_str(dev_name), __get_str(bus_type),
>   		__entry->severity == AER_CORRECTABLE ? "Corrected" :
>   			__entry->severity == AER_FATAL ?
>   			"Fatal" : "Uncorrected, non-fatal",

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
Re: [PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging
Posted by Bowman, Terry 6 months, 2 weeks ago

On 6/3/2025 5:02 PM, Sathyanarayanan Kuppuswamy wrote:
> On 6/3/25 10:22 AM, Terry Bowman wrote:
>> The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
>> for all errors. Update the driver and aer_event tracing to log 'CXL Bus
>> Type' for CXL device errors.
>>
>> This requires the AER can identify and distinguish between PCIe errors and
>> CXL errors.
>>
>> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
>> aer_get_device_error_info() and pci_print_aer().
>>
>> Update the aer_event trace routine to accept a bus type string parameter.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
>> ---
>>   drivers/pci/pci.h       |  6 ++++++
>>   drivers/pci/pcie/aer.c  | 18 ++++++++++++------
>>   include/ras/ras_event.h |  9 ++++++---
>>   3 files changed, 24 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index b81e99cd4b62..d6296500b004 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>>   struct aer_err_info {
>>   	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>>   	int error_dev_num;
>> +	bool is_cxl;
> Do you really need this member ? Why not just use pcie_is_cxl() in aer_err_bus()?

This was added per Dan's request instead of using pcie_is_cxl().[1]

[1] https://lore.kernel.org/linux-cxl/67abe1903a8ed_2d1e2942f@dwillia2-xfh.jf.intel.com.notmuch/

-Terry

>>   
>>   	unsigned int id:16;
>>   
>> @@ -604,6 +605,11 @@ struct aer_err_info {
>>   	struct pcie_tlp_log tlp;	/* TLP Header */
>>   };
>>   
>> +static inline const char *aer_err_bus(struct aer_err_info *info)
>> +{
>> +	return info->is_cxl ? "CXL" : "PCIe";
>> +}
>> +
>>   int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>>   void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>>   
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>> index a1cf8c7ef628..adb4b1123b9b 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -698,13 +698,14 @@ static void __aer_print_error(struct pci_dev *dev,
>>   
>>   void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>   {
>> +	const char *bus_type = aer_err_bus(info);
>>   	int layer, agent;
>>   	int id = pci_dev_id(dev);
>>   	const char *level;
>>   
>>   	if (!info->status) {
>> -		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
>> -			aer_error_severity_string[info->severity]);
>> +		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
>> +			bus_type, aer_error_severity_string[info->severity]);
>>   		goto out;
>>   	}
>>   
>> @@ -713,8 +714,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>   
>>   	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
>>   
>> -	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>> -		   aer_error_severity_string[info->severity],
>> +	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
>> +		   bus_type, aer_error_severity_string[info->severity],
>>   		   aer_error_layer[layer], aer_agent_string[agent]);
>>   
>>   	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
>> @@ -729,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>   	if (info->id && info->error_dev_num > 1 && info->id == id)
>>   		pci_err(dev, "  Error of this Agent is reported first\n");
>>   
>> -	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
>> +	trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
>>   			info->severity, info->tlp_header_valid, &info->tlp);
>>   }
>>   
>> @@ -763,6 +764,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
>>   void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>   		   struct aer_capability_regs *aer)
>>   {
>> +	const char *bus_type;
>>   	int layer, agent, tlp_header_valid = 0;
>>   	u32 status, mask;
>>   	struct aer_err_info info;
>> @@ -784,6 +786,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>   	info.status = status;
>>   	info.mask = mask;
>>   	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
>> +	info.is_cxl = pcie_is_cxl(dev);
>> +
>> +	bus_type = aer_err_bus(&info);
>>   
>>   	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>>   	__aer_print_error(dev, &info);
>> @@ -797,7 +802,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>   	if (tlp_header_valid)
>>   		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
>>   
>> -	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
>> +	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
>>   			aer_severity, tlp_header_valid, &aer->header_log);
>>   }
>>   EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
>> @@ -1215,6 +1220,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>>   	/* Must reset in this function */
>>   	info->status = 0;
>>   	info->tlp_header_valid = 0;
>> +	info->is_cxl = pcie_is_cxl(dev);
>>   
>>   	/* The device might not support AER */
>>   	if (!aer)
>> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
>> index 14c9f943d53f..080829d59c36 100644
>> --- a/include/ras/ras_event.h
>> +++ b/include/ras/ras_event.h
>> @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
>>   
>>   TRACE_EVENT(aer_event,
>>   	TP_PROTO(const char *dev_name,
>> +		 const char *bus_type,
>>   		 const u32 status,
>>   		 const u8 severity,
>>   		 const u8 tlp_header_valid,
>>   		 struct pcie_tlp_log *tlp),
>>   
>> -	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
>> +	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
>>   
>>   	TP_STRUCT__entry(
>>   		__string(	dev_name,	dev_name	)
>> +		__string(	bus_type,	bus_type	)
>>   		__field(	u32,		status		)
>>   		__field(	u8,		severity	)
>>   		__field(	u8, 		tlp_header_valid)
>> @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
>>   
>>   	TP_fast_assign(
>>   		__assign_str(dev_name);
>> +		__assign_str(bus_type);
>>   		__entry->status		= status;
>>   		__entry->severity	= severity;
>>   		__entry->tlp_header_valid = tlp_header_valid;
>> @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
>>   		}
>>   	),
>>   
>> -	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
>> -		__get_str(dev_name),
>> +	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
>> +		__get_str(dev_name), __get_str(bus_type),
>>   		__entry->severity == AER_CORRECTABLE ? "Corrected" :
>>   			__entry->severity == AER_FATAL ?
>>   			"Fatal" : "Uncorrected, non-fatal",
Re: [PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging
Posted by Sathyanarayanan Kuppuswamy 6 months, 2 weeks ago
On 6/4/25 7:32 AM, Bowman, Terry wrote:
>
> On 6/3/2025 5:02 PM, Sathyanarayanan Kuppuswamy wrote:
>> On 6/3/25 10:22 AM, Terry Bowman wrote:
>>> The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
>>> for all errors. Update the driver and aer_event tracing to log 'CXL Bus
>>> Type' for CXL device errors.
>>>
>>> This requires the AER can identify and distinguish between PCIe errors and
>>> CXL errors.
>>>
>>> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
>>> aer_get_device_error_info() and pci_print_aer().
>>>
>>> Update the aer_event trace routine to accept a bus type string parameter.
>>>
>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
>>> ---
>>>    drivers/pci/pci.h       |  6 ++++++
>>>    drivers/pci/pcie/aer.c  | 18 ++++++++++++------
>>>    include/ras/ras_event.h |  9 ++++++---
>>>    3 files changed, 24 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>>> index b81e99cd4b62..d6296500b004 100644
>>> --- a/drivers/pci/pci.h
>>> +++ b/drivers/pci/pci.h
>>> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>>>    struct aer_err_info {
>>>    	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>>>    	int error_dev_num;
>>> +	bool is_cxl;
>> Do you really need this member ? Why not just use pcie_is_cxl() in aer_err_bus()?
> This was added per Dan's request instead of using pcie_is_cxl().[1]
>
> [1] https://lore.kernel.org/linux-cxl/67abe1903a8ed_2d1e2942f@dwillia2-xfh.jf.intel.com.notmuch/
>
> -Terry

It looks like it is added to accommodate some future use cases. May be add some info about it in the aer_err_info struct. Just looking at the code, that member value mirrors pci_dev->is_cxl and where ever you read info->cxl, you can also read the value from pci_dev->is_cxl.
>>>    
>>>    	unsigned int id:16;
>>>    
>>> @@ -604,6 +605,11 @@ struct aer_err_info {
>>>    	struct pcie_tlp_log tlp;	/* TLP Header */
>>>    };
>>>    
>>> +static inline const char *aer_err_bus(struct aer_err_info *info)
>>> +{
>>> +	return info->is_cxl ? "CXL" : "PCIe";
>>> +}
>>> +
>>>    int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>>>    void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>>>    
>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>> index a1cf8c7ef628..adb4b1123b9b 100644
>>> --- a/drivers/pci/pcie/aer.c
>>> +++ b/drivers/pci/pcie/aer.c
>>> @@ -698,13 +698,14 @@ static void __aer_print_error(struct pci_dev *dev,
>>>    
>>>    void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>>    {
>>> +	const char *bus_type = aer_err_bus(info);
>>>    	int layer, agent;
>>>    	int id = pci_dev_id(dev);
>>>    	const char *level;
>>>    
>>>    	if (!info->status) {
>>> -		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
>>> -			aer_error_severity_string[info->severity]);
>>> +		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
>>> +			bus_type, aer_error_severity_string[info->severity]);
>>>    		goto out;
>>>    	}
>>>    
>>> @@ -713,8 +714,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>>    
>>>    	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
>>>    
>>> -	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>>> -		   aer_error_severity_string[info->severity],
>>> +	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
>>> +		   bus_type, aer_error_severity_string[info->severity],
>>>    		   aer_error_layer[layer], aer_agent_string[agent]);
>>>    
>>>    	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
>>> @@ -729,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>>    	if (info->id && info->error_dev_num > 1 && info->id == id)
>>>    		pci_err(dev, "  Error of this Agent is reported first\n");
>>>    
>>> -	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
>>> +	trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
>>>    			info->severity, info->tlp_header_valid, &info->tlp);
>>>    }
>>>    
>>> @@ -763,6 +764,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
>>>    void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>>    		   struct aer_capability_regs *aer)
>>>    {
>>> +	const char *bus_type;
>>>    	int layer, agent, tlp_header_valid = 0;
>>>    	u32 status, mask;
>>>    	struct aer_err_info info;
>>> @@ -784,6 +786,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>>    	info.status = status;
>>>    	info.mask = mask;
>>>    	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
>>> +	info.is_cxl = pcie_is_cxl(dev);
>>> +
>>> +	bus_type = aer_err_bus(&info);
>>>    
>>>    	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>>>    	__aer_print_error(dev, &info);
>>> @@ -797,7 +802,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>>    	if (tlp_header_valid)
>>>    		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
>>>    
>>> -	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
>>> +	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
>>>    			aer_severity, tlp_header_valid, &aer->header_log);
>>>    }
>>>    EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
>>> @@ -1215,6 +1220,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>>>    	/* Must reset in this function */
>>>    	info->status = 0;
>>>    	info->tlp_header_valid = 0;
>>> +	info->is_cxl = pcie_is_cxl(dev);
>>>    
>>>    	/* The device might not support AER */
>>>    	if (!aer)
>>> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
>>> index 14c9f943d53f..080829d59c36 100644
>>> --- a/include/ras/ras_event.h
>>> +++ b/include/ras/ras_event.h
>>> @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
>>>    
>>>    TRACE_EVENT(aer_event,
>>>    	TP_PROTO(const char *dev_name,
>>> +		 const char *bus_type,
>>>    		 const u32 status,
>>>    		 const u8 severity,
>>>    		 const u8 tlp_header_valid,
>>>    		 struct pcie_tlp_log *tlp),
>>>    
>>> -	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
>>> +	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
>>>    
>>>    	TP_STRUCT__entry(
>>>    		__string(	dev_name,	dev_name	)
>>> +		__string(	bus_type,	bus_type	)
>>>    		__field(	u32,		status		)
>>>    		__field(	u8,		severity	)
>>>    		__field(	u8, 		tlp_header_valid)
>>> @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
>>>    
>>>    	TP_fast_assign(
>>>    		__assign_str(dev_name);
>>> +		__assign_str(bus_type);
>>>    		__entry->status		= status;
>>>    		__entry->severity	= severity;
>>>    		__entry->tlp_header_valid = tlp_header_valid;
>>> @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
>>>    		}
>>>    	),
>>>    
>>> -	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
>>> -		__get_str(dev_name),
>>> +	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
>>> +		__get_str(dev_name), __get_str(bus_type),
>>>    		__entry->severity == AER_CORRECTABLE ? "Corrected" :
>>>    			__entry->severity == AER_FATAL ?
>>>    			"Fatal" : "Uncorrected, non-fatal",
>
-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer
Re: [PATCH v9 02/16] PCI/AER: Report CXL or PCIe bus error type in trace logging
Posted by Bowman, Terry 6 months, 2 weeks ago
On 6/4/2025 2:24 PM, Sathyanarayanan Kuppuswamy wrote:
> 
> On 6/4/25 7:32 AM, Bowman, Terry wrote:
>>
>> On 6/3/2025 5:02 PM, Sathyanarayanan Kuppuswamy wrote:
>>> On 6/3/25 10:22 AM, Terry Bowman wrote:
>>>> The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
>>>> for all errors. Update the driver and aer_event tracing to log 'CXL Bus
>>>> Type' for CXL device errors.
>>>>
>>>> This requires the AER can identify and distinguish between PCIe errors and
>>>> CXL errors.
>>>>
>>>> Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
>>>> aer_get_device_error_info() and pci_print_aer().
>>>>
>>>> Update the aer_event trace routine to accept a bus type string parameter.
>>>>
>>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
>>>> ---
>>>>    drivers/pci/pci.h       |  6 ++++++
>>>>    drivers/pci/pcie/aer.c  | 18 ++++++++++++------
>>>>    include/ras/ras_event.h |  9 ++++++---
>>>>    3 files changed, 24 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>>>> index b81e99cd4b62..d6296500b004 100644
>>>> --- a/drivers/pci/pci.h
>>>> +++ b/drivers/pci/pci.h
>>>> @@ -588,6 +588,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>>>>    struct aer_err_info {
>>>>    	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
>>>>    	int error_dev_num;
>>>> +	bool is_cxl;
>>> Do you really need this member ? Why not just use pcie_is_cxl() in aer_err_bus()?
>> This was added per Dan's request instead of using pcie_is_cxl().[1]
>>
>> [1] https://lore.kernel.org/linux-cxl/67abe1903a8ed_2d1e2942f@dwillia2-xfh.jf.intel.com.notmuch/
>>
>> -Terry
> 
> It looks like it is added to accommodate some future use cases. May be add some info about it in the aer_err_info struct. Just looking at the code, that member value mirrors pci_dev->is_cxl and where ever you read info->cxl, you can also read the value from pci_dev->is_cxl.

Right. pci_dev::is_cxl is currently only updated at device creation but could be tied 
to alternate protocol training and link status changes.

Terry

>>>>    
>>>>    	unsigned int id:16;
>>>>    
>>>> @@ -604,6 +605,11 @@ struct aer_err_info {
>>>>    	struct pcie_tlp_log tlp;	/* TLP Header */
>>>>    };
>>>>    
>>>> +static inline const char *aer_err_bus(struct aer_err_info *info)
>>>> +{
>>>> +	return info->is_cxl ? "CXL" : "PCIe";
>>>> +}
>>>> +
>>>>    int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
>>>>    void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
>>>>    
>>>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>>>> index a1cf8c7ef628..adb4b1123b9b 100644
>>>> --- a/drivers/pci/pcie/aer.c
>>>> +++ b/drivers/pci/pcie/aer.c
>>>> @@ -698,13 +698,14 @@ static void __aer_print_error(struct pci_dev *dev,
>>>>    
>>>>    void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>>>    {
>>>> +	const char *bus_type = aer_err_bus(info);
>>>>    	int layer, agent;
>>>>    	int id = pci_dev_id(dev);
>>>>    	const char *level;
>>>>    
>>>>    	if (!info->status) {
>>>> -		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
>>>> -			aer_error_severity_string[info->severity]);
>>>> +		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
>>>> +			bus_type, aer_error_severity_string[info->severity]);
>>>>    		goto out;
>>>>    	}
>>>>    
>>>> @@ -713,8 +714,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>>>    
>>>>    	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
>>>>    
>>>> -	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>>>> -		   aer_error_severity_string[info->severity],
>>>> +	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
>>>> +		   bus_type, aer_error_severity_string[info->severity],
>>>>    		   aer_error_layer[layer], aer_agent_string[agent]);
>>>>    
>>>>    	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
>>>> @@ -729,7 +730,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>>>>    	if (info->id && info->error_dev_num > 1 && info->id == id)
>>>>    		pci_err(dev, "  Error of this Agent is reported first\n");
>>>>    
>>>> -	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
>>>> +	trace_aer_event(dev_name(&dev->dev), bus_type, (info->status & ~info->mask),
>>>>    			info->severity, info->tlp_header_valid, &info->tlp);
>>>>    }
>>>>    
>>>> @@ -763,6 +764,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
>>>>    void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>>>    		   struct aer_capability_regs *aer)
>>>>    {
>>>> +	const char *bus_type;
>>>>    	int layer, agent, tlp_header_valid = 0;
>>>>    	u32 status, mask;
>>>>    	struct aer_err_info info;
>>>> @@ -784,6 +786,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>>>    	info.status = status;
>>>>    	info.mask = mask;
>>>>    	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
>>>> +	info.is_cxl = pcie_is_cxl(dev);
>>>> +
>>>> +	bus_type = aer_err_bus(&info);
>>>>    
>>>>    	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>>>>    	__aer_print_error(dev, &info);
>>>> @@ -797,7 +802,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>>>>    	if (tlp_header_valid)
>>>>    		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
>>>>    
>>>> -	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
>>>> +	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
>>>>    			aer_severity, tlp_header_valid, &aer->header_log);
>>>>    }
>>>>    EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
>>>> @@ -1215,6 +1220,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>>>>    	/* Must reset in this function */
>>>>    	info->status = 0;
>>>>    	info->tlp_header_valid = 0;
>>>> +	info->is_cxl = pcie_is_cxl(dev);
>>>>    
>>>>    	/* The device might not support AER */
>>>>    	if (!aer)
>>>> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
>>>> index 14c9f943d53f..080829d59c36 100644
>>>> --- a/include/ras/ras_event.h
>>>> +++ b/include/ras/ras_event.h
>>>> @@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
>>>>    
>>>>    TRACE_EVENT(aer_event,
>>>>    	TP_PROTO(const char *dev_name,
>>>> +		 const char *bus_type,
>>>>    		 const u32 status,
>>>>    		 const u8 severity,
>>>>    		 const u8 tlp_header_valid,
>>>>    		 struct pcie_tlp_log *tlp),
>>>>    
>>>> -	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
>>>> +	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
>>>>    
>>>>    	TP_STRUCT__entry(
>>>>    		__string(	dev_name,	dev_name	)
>>>> +		__string(	bus_type,	bus_type	)
>>>>    		__field(	u32,		status		)
>>>>    		__field(	u8,		severity	)
>>>>    		__field(	u8, 		tlp_header_valid)
>>>> @@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
>>>>    
>>>>    	TP_fast_assign(
>>>>    		__assign_str(dev_name);
>>>> +		__assign_str(bus_type);
>>>>    		__entry->status		= status;
>>>>    		__entry->severity	= severity;
>>>>    		__entry->tlp_header_valid = tlp_header_valid;
>>>> @@ -325,8 +328,8 @@ TRACE_EVENT(aer_event,
>>>>    		}
>>>>    	),
>>>>    
>>>> -	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
>>>> -		__get_str(dev_name),
>>>> +	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
>>>> +		__get_str(dev_name), __get_str(bus_type),
>>>>    		__entry->severity == AER_CORRECTABLE ? "Corrected" :
>>>>    			__entry->severity == AER_FATAL ?
>>>>    			"Fatal" : "Uncorrected, non-fatal",
>>