[v12] Enable CXL PCIe Port Protocol Error handling and logging

[PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Terry Bowman 4 months, 2 weeks ago

Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
handling. Follow similar design as found in PCIe error driver,
pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
as fatal with a kernel panic. This is to prevent corruption on CXL memory.

Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
CXL ports instead. This will iterate through the CXL topology from the
erroring device through the downstream CXL Ports and Endpoints.

Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>

---

Changes in v11->v12:
- Cleaned up port discovery in cxl_do_recovery() (Dave)
- Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()

Changes in v10->v11:
- pci_ers_merge_results() - Move to earlier patch
---
 drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 7e8d63c32d72..45f92defca64 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
 
+static int cxl_report_error_detected(struct device *dev, void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	pci_ers_result_t vote, *result = data;
+
+	guard(device)(dev);
+
+	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
+	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
+		if (!cxl_pci_drv_bound(pdev))
+			return 0;
+
+		vote = cxl_error_detected(dev);
+	} else {
+		vote = cxl_port_error_detected(dev);
+	}
+
+	*result = pci_ers_merge_result(*result, vote);
+
+	return 0;
+}
+
+static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
+{
+	struct cxl_port *port;
+
+	if (!is_cxl_port(dev))
+		return 0;
+
+	port = to_cxl_port(dev);
+
+	return port->parent_dport->dport_dev == dport_dev;
+}
+
+static void cxl_walk_port(struct device *port_dev,
+			  int (*cb)(struct device *, void *),
+			  void *userdata)
+{
+	struct cxl_dport *dport = NULL;
+	struct cxl_port *port;
+	unsigned long index;
+
+	if (!port_dev)
+		return;
+
+	port = to_cxl_port(port_dev);
+	if (port->uport_dev && dev_is_pci(port->uport_dev))
+		cb(port->uport_dev, userdata);
+
+	xa_for_each(&port->dports, index, dport)
+	{
+		struct device *child_port_dev __free(put_device) =
+			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
+					match_port_by_parent_dport);
+
+		cb(dport->dport_dev, userdata);
+
+		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
+	}
+
+	if (is_cxl_endpoint(port))
+		cb(port->uport_dev->parent, userdata);
+}
+
 static void cxl_do_recovery(struct device *dev)
 {
+	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct cxl_port *port = NULL;
+
+	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
+	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
+		struct cxl_dport *dport;
+		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
+
+		port = rp_port;
+
+	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
+		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
+
+		port = us_port;
+
+	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
+		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
+		struct cxl_dev_state *cxlds;
+
+		if (!cxl_pci_drv_bound(pdev))
+			return;
+
+		cxlds = pci_get_drvdata(pdev);
+		port = cxlds->cxlmd->endpoint;
+	}
+
+	if (!port) {
+		dev_err(dev, "Failed to find the CXL device\n");
+		return;
+	}
+
+	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
+	if (status == PCI_ERS_RESULT_PANIC)
+		panic("CXL cachemem error.");
+
+	/*
+	 * If we have native control of AER, clear error status in the device
+	 * that detected the error.  If the platform retained control of AER,
+	 * it is responsible for clearing this status.  In that case, the
+	 * signaling device may not even be visible to the OS.
+	 */
+	if (cxl_error_is_native(pdev)) {
+		pcie_clear_device_status(pdev);
+		pci_aer_clear_nonfatal_status(pdev);
+		pci_aer_clear_fatal_status(pdev);
+	}
 }
 
 static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)
-- 
2.34.1

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Cheatham, Benjamin 4 months, 1 week ago

On 9/25/2025 5:34 PM, Terry Bowman wrote:
> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
> handling. Follow similar design as found in PCIe error driver,
> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
> 
> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
> CXL ports instead. This will iterate through the CXL topology from the
> erroring device through the downstream CXL Ports and Endpoints.
> 
> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
> 
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> 
> ---
> 
> Changes in v11->v12:
> - Cleaned up port discovery in cxl_do_recovery() (Dave)
> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
> 
> Changes in v10->v11:
> - pci_ers_merge_results() - Move to earlier patch
> ---
>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 111 insertions(+)
> 
> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> index 7e8d63c32d72..45f92defca64 100644
> --- a/drivers/cxl/core/ras.c
> +++ b/drivers/cxl/core/ras.c
> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>  
> +static int cxl_report_error_detected(struct device *dev, void *data)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);

Should probably check if dev_is_pci() first. I don't think it's an issue since
only ports under the root dport are being iterated, but I don't know if this would
trip up cxl_test.

> +	pci_ers_result_t vote, *result = data;
> +
> +	guard(device)(dev);
> +
> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
> +		if (!cxl_pci_drv_bound(pdev))
> +			return 0;
> +
> +		vote = cxl_error_detected(dev);
> +	} else {
> +		vote = cxl_port_error_detected(dev);
> +	}
> +
> +	*result = pci_ers_merge_result(*result, vote);
> +
> +	return 0;
> +}
> +
> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
> +{
> +	struct cxl_port *port;
> +
> +	if (!is_cxl_port(dev))
> +		return 0;
> +
> +	port = to_cxl_port(dev);
> +
> +	return port->parent_dport->dport_dev == dport_dev;
> +}
> +
> +static void cxl_walk_port(struct device *port_dev,
> +			  int (*cb)(struct device *, void *),
> +			  void *userdata)

This should just take a struct cxl_port * instead of device *. You always have the
struct cxl_port below and then pass in the &port->dev, only to just cast it back
to a struct cxl_port and never use the device pointer.

> +{
> +	struct cxl_dport *dport = NULL;
> +	struct cxl_port *port;
> +	unsigned long index;
> +
> +	if (!port_dev)
> +		return;
> +
> +	port = to_cxl_port(port_dev);
> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
> +		cb(port->uport_dev, userdata);
> +
> +	xa_for_each(&port->dports, index, dport)
> +	{
> +		struct device *child_port_dev __free(put_device) =
> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
> +					match_port_by_parent_dport);
> +
> +		cb(dport->dport_dev, userdata);
> +
> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
> +	}
> +
> +	if (is_cxl_endpoint(port))
> +		cb(port->uport_dev->parent, userdata);
> +}
> +
>  static void cxl_do_recovery(struct device *dev)
>  {
> +	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct cxl_port *port = NULL;
> +
> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
> +		struct cxl_dport *dport;
> +		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
> +
> +		port = rp_port;
> +
> +	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
> +		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
> +
> +		port = us_port;
> +
> +	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
> +		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
> +		struct cxl_dev_state *cxlds;
> +
> +		if (!cxl_pci_drv_bound(pdev))
> +			return;
> +
> +		cxlds = pci_get_drvdata(pdev);
> +		port = cxlds->cxlmd->endpoint;
> +	}
> +
> +	if (!port) {
> +		dev_err(dev, "Failed to find the CXL device\n");
> +		return;
> +	}
> +
> +	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
> +	if (status == PCI_ERS_RESULT_PANIC)
> +		panic("CXL cachemem error.");
> +
> +	/*
> +	 * If we have native control of AER, clear error status in the device
> +	 * that detected the error.  If the platform retained control of AER,
> +	 * it is responsible for clearing this status.  In that case, the
> +	 * signaling device may not even be visible to the OS.
> +	 */
> +	if (cxl_error_is_native(pdev)) {
> +		pcie_clear_device_status(pdev);
> +		pci_aer_clear_nonfatal_status(pdev);
> +		pci_aer_clear_fatal_status(pdev);
> +	}
>  }
>  
>  static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Dave Jiang 4 months, 1 week ago


On 9/25/25 3:34 PM, Terry Bowman wrote:
> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
> handling. Follow similar design as found in PCIe error driver,
> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
> 
> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
> CXL ports instead. This will iterate through the CXL topology from the
> erroring device through the downstream CXL Ports and Endpoints.
> 
> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
> 
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> 
> ---
> 
> Changes in v11->v12:
> - Cleaned up port discovery in cxl_do_recovery() (Dave)
> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
> 
> Changes in v10->v11:
> - pci_ers_merge_results() - Move to earlier patch
> ---
>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 111 insertions(+)
> 
> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> index 7e8d63c32d72..45f92defca64 100644
> --- a/drivers/cxl/core/ras.c
> +++ b/drivers/cxl/core/ras.c
> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>  
> +static int cxl_report_error_detected(struct device *dev, void *data)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	pci_ers_result_t vote, *result = data;
> +
> +	guard(device)(dev);
> +
> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
> +		if (!cxl_pci_drv_bound(pdev))
> +			return 0;
> +
> +		vote = cxl_error_detected(dev);
> +	} else {
> +		vote = cxl_port_error_detected(dev);
> +	}
> +
> +	*result = pci_ers_merge_result(*result, vote);
> +
> +	return 0;
> +}
> +
> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
> +{
> +	struct cxl_port *port;
> +
> +	if (!is_cxl_port(dev))
> +		return 0;
> +
> +	port = to_cxl_port(dev);
> +
> +	return port->parent_dport->dport_dev == dport_dev;
> +}
> +
> +static void cxl_walk_port(struct device *port_dev,
> +			  int (*cb)(struct device *, void *),
> +			  void *userdata)
> +{
> +	struct cxl_dport *dport = NULL;
> +	struct cxl_port *port;
> +	unsigned long index;
> +
> +	if (!port_dev)
> +		return;
> +
> +	port = to_cxl_port(port_dev);
> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
> +		cb(port->uport_dev, userdata);

Could use some comments on what is being walked. Also an explanation of what is happening here would be good.

If this is an endpoint port, this would be the PCI endpoint device.
If it's a switch port, then this is the upstream port.
If it's a root port, this is skipped.

> +
> +	xa_for_each(&port->dports, index, dport)
> +	{
> +		struct device *child_port_dev __free(put_device) =
> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
> +					match_port_by_parent_dport);
> +
> +		cb(dport->dport_dev, userdata);

This is going through all the downstream ports
> +
> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
> +	}
> +
> +	if (is_cxl_endpoint(port))
> +		cb(port->uport_dev->parent, userdata);

And this is the downstream parent port of the endpoint device

Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.

So in the current implementation,
1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
3. Root port. It checks all the downstream ports for errors.
Is this the correct understanding of what this function does?

> +}
> +
>  static void cxl_do_recovery(struct device *dev)
>  {
> +	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct cxl_port *port = NULL;
> +
> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
> +		struct cxl_dport *dport;
> +		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
> +
> +		port = rp_port;
> +
> +	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
> +		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
> +
> +		port = us_port;
> +
> +	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
> +		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
> +		struct cxl_dev_state *cxlds;
> +
> +		if (!cxl_pci_drv_bound(pdev))
> +			return;

Need to have the pci dev lock before checking driver bound.

DJ
> +
> +		cxlds = pci_get_drvdata(pdev);
> +		port = cxlds->cxlmd->endpoint;
> +	}
> +
> +	if (!port) {
> +		dev_err(dev, "Failed to find the CXL device\n");
> +		return;
> +	}
> +
> +	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
> +	if (status == PCI_ERS_RESULT_PANIC)
> +		panic("CXL cachemem error.");
> +
> +	/*
> +	 * If we have native control of AER, clear error status in the device
> +	 * that detected the error.  If the platform retained control of AER,
> +	 * it is responsible for clearing this status.  In that case, the
> +	 * signaling device may not even be visible to the OS.
> +	 */
> +	if (cxl_error_is_native(pdev)) {
> +		pcie_clear_device_status(pdev);
> +		pci_aer_clear_nonfatal_status(pdev);
> +		pci_aer_clear_fatal_status(pdev);
> +	}
>  }
>  
>  static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Bowman, Terry 4 months, 1 week ago


On 9/29/2025 7:26 PM, Dave Jiang wrote:
>
> On 9/25/25 3:34 PM, Terry Bowman wrote:
>> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
>> handling. Follow similar design as found in PCIe error driver,
>> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
>> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
>>
>> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
>> CXL ports instead. This will iterate through the CXL topology from the
>> erroring device through the downstream CXL Ports and Endpoints.
>>
>> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>
>> ---
>>
>> Changes in v11->v12:
>> - Cleaned up port discovery in cxl_do_recovery() (Dave)
>> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
>>
>> Changes in v10->v11:
>> - pci_ers_merge_results() - Move to earlier patch
>> ---
>>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 111 insertions(+)
>>
>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>> index 7e8d63c32d72..45f92defca64 100644
>> --- a/drivers/cxl/core/ras.c
>> +++ b/drivers/cxl/core/ras.c
>> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>>  }
>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>>  
>> +static int cxl_report_error_detected(struct device *dev, void *data)
>> +{
>> +	struct pci_dev *pdev = to_pci_dev(dev);
>> +	pci_ers_result_t vote, *result = data;
>> +
>> +	guard(device)(dev);
>> +
>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>> +		if (!cxl_pci_drv_bound(pdev))
>> +			return 0;
>> +
>> +		vote = cxl_error_detected(dev);
>> +	} else {
>> +		vote = cxl_port_error_detected(dev);
>> +	}
>> +
>> +	*result = pci_ers_merge_result(*result, vote);
>> +
>> +	return 0;
>> +}
>> +
>> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
>> +{
>> +	struct cxl_port *port;
>> +
>> +	if (!is_cxl_port(dev))
>> +		return 0;
>> +
>> +	port = to_cxl_port(dev);
>> +
>> +	return port->parent_dport->dport_dev == dport_dev;
>> +}
>> +
>> +static void cxl_walk_port(struct device *port_dev,
>> +			  int (*cb)(struct device *, void *),
>> +			  void *userdata)
>> +{
>> +	struct cxl_dport *dport = NULL;
>> +	struct cxl_port *port;
>> +	unsigned long index;
>> +
>> +	if (!port_dev)
>> +		return;
>> +
>> +	port = to_cxl_port(port_dev);
>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>> +		cb(port->uport_dev, userdata);
> Could use some comments on what is being walked. Also an explanation of what is happening here would be good.
Ok
> If this is an endpoint port, this would be the PCI endpoint device.
> If it's a switch port, then this is the upstream port.
> If it's a root port, this is skipped.
>
>> +
>> +	xa_for_each(&port->dports, index, dport)
>> +	{
>> +		struct device *child_port_dev __free(put_device) =
>> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
>> +					match_port_by_parent_dport);
>> +
>> +		cb(dport->dport_dev, userdata);
> This is going through all the downstream ports
>> +
>> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
>> +	}
>> +
>> +	if (is_cxl_endpoint(port))
>> +		cb(port->uport_dev->parent, userdata);
> And this is the downstream parent port of the endpoint device
>
> Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.
Sure, I'll change that.
> So in the current implementation,
> 1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
> 2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
> 3. Root port. It checks all the downstream ports for errors.
> Is this the correct understanding of what this function does?

Yes. The ordering is different as you pointed out. I can move the endpoint 
check earlier with an early return. 
>> +}
>> +
>>  static void cxl_do_recovery(struct device *dev)
>>  {
>> +	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
>> +	struct pci_dev *pdev = to_pci_dev(dev);
>> +	struct cxl_port *port = NULL;
>> +
>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
>> +		struct cxl_dport *dport;
>> +		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>> +
>> +		port = rp_port;
>> +
>> +	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
>> +		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>> +
>> +		port = us_port;
>> +
>> +	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>> +		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>> +		struct cxl_dev_state *cxlds;
>> +
>> +		if (!cxl_pci_drv_bound(pdev))
>> +			return;
> Need to have the pci dev lock before checking driver bound.
> DJ

Ok, I'll try to add that into cxl_pci_drv_bound(). Terry
>> +
>> +		cxlds = pci_get_drvdata(pdev);
>> +		port = cxlds->cxlmd->endpoint;
>> +	}
>> +
>> +	if (!port) {
>> +		dev_err(dev, "Failed to find the CXL device\n");
>> +		return;
>> +	}
>> +
>> +	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
>> +	if (status == PCI_ERS_RESULT_PANIC)
>> +		panic("CXL cachemem error.");
>> +
>> +	/*
>> +	 * If we have native control of AER, clear error status in the device
>> +	 * that detected the error.  If the platform retained control of AER,
>> +	 * it is responsible for clearing this status.  In that case, the
>> +	 * signaling device may not even be visible to the OS.
>> +	 */
>> +	if (cxl_error_is_native(pdev)) {
>> +		pcie_clear_device_status(pdev);
>> +		pci_aer_clear_nonfatal_status(pdev);
>> +		pci_aer_clear_fatal_status(pdev);
>> +	}
>>  }
>>  
>>  static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Dave Jiang 4 months, 1 week ago


On 9/30/25 7:38 AM, Bowman, Terry wrote:
> 
> 
> On 9/29/2025 7:26 PM, Dave Jiang wrote:
>>
>> On 9/25/25 3:34 PM, Terry Bowman wrote:
>>> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
>>> handling. Follow similar design as found in PCIe error driver,
>>> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
>>> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
>>>
>>> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
>>> CXL ports instead. This will iterate through the CXL topology from the
>>> erroring device through the downstream CXL Ports and Endpoints.
>>>
>>> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
>>>
>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>
>>> ---
>>>
>>> Changes in v11->v12:
>>> - Cleaned up port discovery in cxl_do_recovery() (Dave)
>>> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
>>>
>>> Changes in v10->v11:
>>> - pci_ers_merge_results() - Move to earlier patch
>>> ---
>>>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 111 insertions(+)
>>>
>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>> index 7e8d63c32d72..45f92defca64 100644
>>> --- a/drivers/cxl/core/ras.c
>>> +++ b/drivers/cxl/core/ras.c
>>> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>>>  }
>>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>>>  
>>> +static int cxl_report_error_detected(struct device *dev, void *data)
>>> +{
>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>> +	pci_ers_result_t vote, *result = data;
>>> +
>>> +	guard(device)(dev);
>>> +
>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>> +		if (!cxl_pci_drv_bound(pdev))
>>> +			return 0;
>>> +
>>> +		vote = cxl_error_detected(dev);
>>> +	} else {
>>> +		vote = cxl_port_error_detected(dev);
>>> +	}
>>> +
>>> +	*result = pci_ers_merge_result(*result, vote);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
>>> +{
>>> +	struct cxl_port *port;
>>> +
>>> +	if (!is_cxl_port(dev))
>>> +		return 0;
>>> +
>>> +	port = to_cxl_port(dev);
>>> +
>>> +	return port->parent_dport->dport_dev == dport_dev;
>>> +}
>>> +
>>> +static void cxl_walk_port(struct device *port_dev,
>>> +			  int (*cb)(struct device *, void *),
>>> +			  void *userdata)
>>> +{
>>> +	struct cxl_dport *dport = NULL;
>>> +	struct cxl_port *port;
>>> +	unsigned long index;
>>> +
>>> +	if (!port_dev)
>>> +		return;
>>> +
>>> +	port = to_cxl_port(port_dev);
>>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>>> +		cb(port->uport_dev, userdata);
>> Could use some comments on what is being walked. Also an explanation of what is happening here would be good.
> Ok
>> If this is an endpoint port, this would be the PCI endpoint device.
>> If it's a switch port, then this is the upstream port.
>> If it's a root port, this is skipped.
>>
>>> +
>>> +	xa_for_each(&port->dports, index, dport)
>>> +	{
>>> +		struct device *child_port_dev __free(put_device) =
>>> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
>>> +					match_port_by_parent_dport);
>>> +
>>> +		cb(dport->dport_dev, userdata);
>> This is going through all the downstream ports
>>> +
>>> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
>>> +	}
>>> +
>>> +	if (is_cxl_endpoint(port))
>>> +		cb(port->uport_dev->parent, userdata);
>> And this is the downstream parent port of the endpoint device
>>
>> Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.
> Sure, I'll change that.
>> So in the current implementation,
>> 1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
>> 2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
>> 3. Root port. It checks all the downstream ports for errors.
>> Is this the correct understanding of what this function does?
> 
> Yes. The ordering is different as you pointed out. I can move the endpoint 
> check earlier with an early return. 

As the endpoint, what is the reason the check the parent dport? Pardon my ignorance.

>>> +}
>>> +
>>>  static void cxl_do_recovery(struct device *dev)
>>>  {
>>> +	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>> +	struct cxl_port *port = NULL;
>>> +
>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
>>> +		struct cxl_dport *dport;
>>> +		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>>> +
>>> +		port = rp_port;
>>> +
>>> +	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
>>> +		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>>> +
>>> +		port = us_port;
>>> +
>>> +	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>> +		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>> +		struct cxl_dev_state *cxlds;
>>> +
>>> +		if (!cxl_pci_drv_bound(pdev))
>>> +			return;
>> Need to have the pci dev lock before checking driver bound.
>> DJ
> 
> Ok, I'll try to add that into cxl_pci_drv_bound(). Terry

Do you need the lock beyond just checking the driver data? Maybe do it outside cxl_pci_drv_bound(). I would have an assert in the function though to ensure lock is held when calling this function.

DJ
>>> +
>>> +		cxlds = pci_get_drvdata(pdev);
>>> +		port = cxlds->cxlmd->endpoint;
>>> +	}
>>> +
>>> +	if (!port) {
>>> +		dev_err(dev, "Failed to find the CXL device\n");
>>> +		return;
>>> +	}
>>> +
>>> +	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
>>> +	if (status == PCI_ERS_RESULT_PANIC)
>>> +		panic("CXL cachemem error.");
>>> +
>>> +	/*
>>> +	 * If we have native control of AER, clear error status in the device
>>> +	 * that detected the error.  If the platform retained control of AER,
>>> +	 * it is responsible for clearing this status.  In that case, the
>>> +	 * signaling device may not even be visible to the OS.
>>> +	 */
>>> +	if (cxl_error_is_native(pdev)) {
>>> +		pcie_clear_device_status(pdev);
>>> +		pci_aer_clear_nonfatal_status(pdev);
>>> +		pci_aer_clear_fatal_status(pdev);
>>> +	}
>>>  }
>>>  
>>>  static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)
> 
>

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Bowman, Terry 4 months, 1 week ago


On 9/30/2025 11:13 AM, Dave Jiang wrote:
>
> On 9/30/25 7:38 AM, Bowman, Terry wrote:
>>
>> On 9/29/2025 7:26 PM, Dave Jiang wrote:
>>> On 9/25/25 3:34 PM, Terry Bowman wrote:
>>>> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
>>>> handling. Follow similar design as found in PCIe error driver,
>>>> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
>>>> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
>>>>
>>>> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
>>>> CXL ports instead. This will iterate through the CXL topology from the
>>>> erroring device through the downstream CXL Ports and Endpoints.
>>>>
>>>> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
>>>>
>>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>>
>>>> ---
>>>>
>>>> Changes in v11->v12:
>>>> - Cleaned up port discovery in cxl_do_recovery() (Dave)
>>>> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
>>>>
>>>> Changes in v10->v11:
>>>> - pci_ers_merge_results() - Move to earlier patch
>>>> ---
>>>>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 111 insertions(+)
>>>>
>>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>>> index 7e8d63c32d72..45f92defca64 100644
>>>> --- a/drivers/cxl/core/ras.c
>>>> +++ b/drivers/cxl/core/ras.c
>>>> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>>>>  }
>>>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>>>>  
>>>> +static int cxl_report_error_detected(struct device *dev, void *data)
>>>> +{
>>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>>> +	pci_ers_result_t vote, *result = data;
>>>> +
>>>> +	guard(device)(dev);
>>>> +
>>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>>> +		if (!cxl_pci_drv_bound(pdev))
>>>> +			return 0;
>>>> +
>>>> +		vote = cxl_error_detected(dev);
>>>> +	} else {
>>>> +		vote = cxl_port_error_detected(dev);
>>>> +	}
>>>> +
>>>> +	*result = pci_ers_merge_result(*result, vote);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
>>>> +{
>>>> +	struct cxl_port *port;
>>>> +
>>>> +	if (!is_cxl_port(dev))
>>>> +		return 0;
>>>> +
>>>> +	port = to_cxl_port(dev);
>>>> +
>>>> +	return port->parent_dport->dport_dev == dport_dev;
>>>> +}
>>>> +
>>>> +static void cxl_walk_port(struct device *port_dev,
>>>> +			  int (*cb)(struct device *, void *),
>>>> +			  void *userdata)
>>>> +{
>>>> +	struct cxl_dport *dport = NULL;
>>>> +	struct cxl_port *port;
>>>> +	unsigned long index;
>>>> +
>>>> +	if (!port_dev)
>>>> +		return;
>>>> +
>>>> +	port = to_cxl_port(port_dev);
>>>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>>>> +		cb(port->uport_dev, userdata);
>>> Could use some comments on what is being walked. Also an explanation of what is happening here would be good.
>> Ok
>>> If this is an endpoint port, this would be the PCI endpoint device.
>>> If it's a switch port, then this is the upstream port.
>>> If it's a root port, this is skipped.
>>>
>>>> +
>>>> +	xa_for_each(&port->dports, index, dport)
>>>> +	{
>>>> +		struct device *child_port_dev __free(put_device) =
>>>> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
>>>> +					match_port_by_parent_dport);
>>>> +
>>>> +		cb(dport->dport_dev, userdata);
>>> This is going through all the downstream ports
>>>> +
>>>> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
>>>> +	}
>>>> +
>>>> +	if (is_cxl_endpoint(port))
>>>> +		cb(port->uport_dev->parent, userdata);
>>> And this is the downstream parent port of the endpoint device
>>>
>>> Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.
>> Sure, I'll change that.
>>> So in the current implementation,
>>> 1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
>>> 2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
>>> 3. Root port. It checks all the downstream ports for errors.
>>> Is this the correct understanding of what this function does?
>> Yes. The ordering is different as you pointed out. I can move the endpoint 
>> check earlier with an early return. 
> As the endpoint, what is the reason the check the parent dport? Pardon my ignorance.

There is none. An endpoint port will not have downstream ports.  

>>>> +}
>>>> +
>>>>  static void cxl_do_recovery(struct device *dev)
>>>>  {
>>>> +	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
>>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>>> +	struct cxl_port *port = NULL;
>>>> +
>>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
>>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
>>>> +		struct cxl_dport *dport;
>>>> +		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>>>> +
>>>> +		port = rp_port;
>>>> +
>>>> +	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
>>>> +		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>>>> +
>>>> +		port = us_port;
>>>> +
>>>> +	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>>> +		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>>> +		struct cxl_dev_state *cxlds;
>>>> +
>>>> +		if (!cxl_pci_drv_bound(pdev))
>>>> +			return;
>>> Need to have the pci dev lock before checking driver bound.
>>> DJ
>> Ok, I'll try to add that into cxl_pci_drv_bound(). Terry
> Do you need the lock beyond just checking the driver data? Maybe do it outside cxl_pci_drv_bound(). I would have an assert in the function though to ensure lock is held when calling this function.

Ok, good idea.

Terry
> DJ
>>>> +
>>>> +		cxlds = pci_get_drvdata(pdev);
>>>> +		port = cxlds->cxlmd->endpoint;
>>>> +	}
>>>> +
>>>> +	if (!port) {
>>>> +		dev_err(dev, "Failed to find the CXL device\n");
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
>>>> +	if (status == PCI_ERS_RESULT_PANIC)
>>>> +		panic("CXL cachemem error.");
>>>> +
>>>> +	/*
>>>> +	 * If we have native control of AER, clear error status in the device
>>>> +	 * that detected the error.  If the platform retained control of AER,
>>>> +	 * it is responsible for clearing this status.  In that case, the
>>>> +	 * signaling device may not even be visible to the OS.
>>>> +	 */
>>>> +	if (cxl_error_is_native(pdev)) {
>>>> +		pcie_clear_device_status(pdev);
>>>> +		pci_aer_clear_nonfatal_status(pdev);
>>>> +		pci_aer_clear_fatal_status(pdev);
>>>> +	}
>>>>  }
>>>>  
>>>>  static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)
>>

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Dave Jiang 4 months, 1 week ago


On 9/30/25 9:43 AM, Bowman, Terry wrote:
> 
> 
> On 9/30/2025 11:13 AM, Dave Jiang wrote:
>>
>> On 9/30/25 7:38 AM, Bowman, Terry wrote:
>>>
>>> On 9/29/2025 7:26 PM, Dave Jiang wrote:
>>>> On 9/25/25 3:34 PM, Terry Bowman wrote:
>>>>> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
>>>>> handling. Follow similar design as found in PCIe error driver,
>>>>> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
>>>>> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
>>>>>
>>>>> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
>>>>> CXL ports instead. This will iterate through the CXL topology from the
>>>>> erroring device through the downstream CXL Ports and Endpoints.
>>>>>
>>>>> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
>>>>>
>>>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>>>
>>>>> ---
>>>>>
>>>>> Changes in v11->v12:
>>>>> - Cleaned up port discovery in cxl_do_recovery() (Dave)
>>>>> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
>>>>>
>>>>> Changes in v10->v11:
>>>>> - pci_ers_merge_results() - Move to earlier patch
>>>>> ---
>>>>>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>>>>>  1 file changed, 111 insertions(+)
>>>>>
>>>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>>>> index 7e8d63c32d72..45f92defca64 100644
>>>>> --- a/drivers/cxl/core/ras.c
>>>>> +++ b/drivers/cxl/core/ras.c
>>>>> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>>>>>  }
>>>>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>>>>>  
>>>>> +static int cxl_report_error_detected(struct device *dev, void *data)
>>>>> +{
>>>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +	pci_ers_result_t vote, *result = data;
>>>>> +
>>>>> +	guard(device)(dev);
>>>>> +
>>>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>>>> +		if (!cxl_pci_drv_bound(pdev))
>>>>> +			return 0;
>>>>> +
>>>>> +		vote = cxl_error_detected(dev);
>>>>> +	} else {
>>>>> +		vote = cxl_port_error_detected(dev);
>>>>> +	}
>>>>> +
>>>>> +	*result = pci_ers_merge_result(*result, vote);
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
>>>>> +{
>>>>> +	struct cxl_port *port;
>>>>> +
>>>>> +	if (!is_cxl_port(dev))
>>>>> +		return 0;
>>>>> +
>>>>> +	port = to_cxl_port(dev);
>>>>> +
>>>>> +	return port->parent_dport->dport_dev == dport_dev;
>>>>> +}
>>>>> +
>>>>> +static void cxl_walk_port(struct device *port_dev,
>>>>> +			  int (*cb)(struct device *, void *),
>>>>> +			  void *userdata)
>>>>> +{
>>>>> +	struct cxl_dport *dport = NULL;
>>>>> +	struct cxl_port *port;
>>>>> +	unsigned long index;
>>>>> +
>>>>> +	if (!port_dev)
>>>>> +		return;
>>>>> +
>>>>> +	port = to_cxl_port(port_dev);
>>>>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>>>>> +		cb(port->uport_dev, userdata);
>>>> Could use some comments on what is being walked. Also an explanation of what is happening here would be good.
>>> Ok
>>>> If this is an endpoint port, this would be the PCI endpoint device.
>>>> If it's a switch port, then this is the upstream port.
>>>> If it's a root port, this is skipped.
>>>>
>>>>> +
>>>>> +	xa_for_each(&port->dports, index, dport)
>>>>> +	{
>>>>> +		struct device *child_port_dev __free(put_device) =
>>>>> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
>>>>> +					match_port_by_parent_dport);
>>>>> +
>>>>> +		cb(dport->dport_dev, userdata);
>>>> This is going through all the downstream ports
>>>>> +
>>>>> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
>>>>> +	}
>>>>> +
>>>>> +	if (is_cxl_endpoint(port))
>>>>> +		cb(port->uport_dev->parent, userdata);
>>>> And this is the downstream parent port of the endpoint device
>>>>
>>>> Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.
>>> Sure, I'll change that.
>>>> So in the current implementation,
>>>> 1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
>>>> 2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
>>>> 3. Root port. It checks all the downstream ports for errors.
>>>> Is this the correct understanding of what this function does?
>>> Yes. The ordering is different as you pointed out. I can move the endpoint 
>>> check earlier with an early return. 
>> As the endpoint, what is the reason the check the parent dport? Pardon my ignorance.
> 
> There is none. An endpoint port will not have downstream ports.

parent dport. It would be the root port or the switch downstream port. This is what the current code is doing:

>>>>> +	if (is_cxl_endpoint(port))
>>>>> +		cb(port->uport_dev->parent, userdata);

DJ

  
> 
>>>>> +}
>>>>> +
>>>>>  static void cxl_do_recovery(struct device *dev)
>>>>>  {
>>>>> +	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
>>>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>>>> +	struct cxl_port *port = NULL;
>>>>> +
>>>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) ||
>>>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM)) {
>>>>> +		struct cxl_dport *dport;
>>>>> +		struct cxl_port *rp_port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
>>>>> +
>>>>> +		port = rp_port;
>>>>> +
>>>>> +	} else	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_UPSTREAM) {
>>>>> +		struct cxl_port *us_port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
>>>>> +
>>>>> +		port = us_port;
>>>>> +
>>>>> +	} else	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>>>> +		    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>>>> +		struct cxl_dev_state *cxlds;
>>>>> +
>>>>> +		if (!cxl_pci_drv_bound(pdev))
>>>>> +			return;
>>>> Need to have the pci dev lock before checking driver bound.
>>>> DJ
>>> Ok, I'll try to add that into cxl_pci_drv_bound(). Terry
>> Do you need the lock beyond just checking the driver data? Maybe do it outside cxl_pci_drv_bound(). I would have an assert in the function though to ensure lock is held when calling this function.
> 
> Ok, good idea.
> 
> Terry
>> DJ
>>>>> +
>>>>> +		cxlds = pci_get_drvdata(pdev);
>>>>> +		port = cxlds->cxlmd->endpoint;
>>>>> +	}
>>>>> +
>>>>> +	if (!port) {
>>>>> +		dev_err(dev, "Failed to find the CXL device\n");
>>>>> +		return;
>>>>> +	}
>>>>> +
>>>>> +	cxl_walk_port(&port->dev, cxl_report_error_detected, &status);
>>>>> +	if (status == PCI_ERS_RESULT_PANIC)
>>>>> +		panic("CXL cachemem error.");
>>>>> +
>>>>> +	/*
>>>>> +	 * If we have native control of AER, clear error status in the device
>>>>> +	 * that detected the error.  If the platform retained control of AER,
>>>>> +	 * it is responsible for clearing this status.  In that case, the
>>>>> +	 * signaling device may not even be visible to the OS.
>>>>> +	 */
>>>>> +	if (cxl_error_is_native(pdev)) {
>>>>> +		pcie_clear_device_status(pdev);
>>>>> +		pci_aer_clear_nonfatal_status(pdev);
>>>>> +		pci_aer_clear_fatal_status(pdev);
>>>>> +	}
>>>>>  }
>>>>>  
>>>>>  static void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base)
>>>
>

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Bowman, Terry 4 months, 1 week ago


On 9/30/2025 11:46 AM, Dave Jiang wrote:
>
> On 9/30/25 9:43 AM, Bowman, Terry wrote:
>>
>> On 9/30/2025 11:13 AM, Dave Jiang wrote:
>>> On 9/30/25 7:38 AM, Bowman, Terry wrote:
>>>> On 9/29/2025 7:26 PM, Dave Jiang wrote:
>>>>> On 9/25/25 3:34 PM, Terry Bowman wrote:
>>>>>> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
>>>>>> handling. Follow similar design as found in PCIe error driver,
>>>>>> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
>>>>>> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
>>>>>>
>>>>>> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
>>>>>> CXL ports instead. This will iterate through the CXL topology from the
>>>>>> erroring device through the downstream CXL Ports and Endpoints.
>>>>>>
>>>>>> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
>>>>>>
>>>>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>>>>
>>>>>> ---
>>>>>>
>>>>>> Changes in v11->v12:
>>>>>> - Cleaned up port discovery in cxl_do_recovery() (Dave)
>>>>>> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
>>>>>>
>>>>>> Changes in v10->v11:
>>>>>> - pci_ers_merge_results() - Move to earlier patch
>>>>>> ---
>>>>>>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 111 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>>>>> index 7e8d63c32d72..45f92defca64 100644
>>>>>> --- a/drivers/cxl/core/ras.c
>>>>>> +++ b/drivers/cxl/core/ras.c
>>>>>> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>>>>>>  }
>>>>>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>>>>>>  
>>>>>> +static int cxl_report_error_detected(struct device *dev, void *data)
>>>>>> +{
>>>>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>>>>> +	pci_ers_result_t vote, *result = data;
>>>>>> +
>>>>>> +	guard(device)(dev);
>>>>>> +
>>>>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>>>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>>>>> +		if (!cxl_pci_drv_bound(pdev))
>>>>>> +			return 0;
>>>>>> +
>>>>>> +		vote = cxl_error_detected(dev);
>>>>>> +	} else {
>>>>>> +		vote = cxl_port_error_detected(dev);
>>>>>> +	}
>>>>>> +
>>>>>> +	*result = pci_ers_merge_result(*result, vote);
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
>>>>>> +{
>>>>>> +	struct cxl_port *port;
>>>>>> +
>>>>>> +	if (!is_cxl_port(dev))
>>>>>> +		return 0;
>>>>>> +
>>>>>> +	port = to_cxl_port(dev);
>>>>>> +
>>>>>> +	return port->parent_dport->dport_dev == dport_dev;
>>>>>> +}
>>>>>> +
>>>>>> +static void cxl_walk_port(struct device *port_dev,
>>>>>> +			  int (*cb)(struct device *, void *),
>>>>>> +			  void *userdata)
>>>>>> +{
>>>>>> +	struct cxl_dport *dport = NULL;
>>>>>> +	struct cxl_port *port;
>>>>>> +	unsigned long index;
>>>>>> +
>>>>>> +	if (!port_dev)
>>>>>> +		return;
>>>>>> +
>>>>>> +	port = to_cxl_port(port_dev);
>>>>>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>>>>>> +		cb(port->uport_dev, userdata);
>>>>> Could use some comments on what is being walked. Also an explanation of what is happening here would be good.
>>>> Ok
>>>>> If this is an endpoint port, this would be the PCI endpoint device.
>>>>> If it's a switch port, then this is the upstream port.
>>>>> If it's a root port, this is skipped.
>>>>>
>>>>>> +
>>>>>> +	xa_for_each(&port->dports, index, dport)
>>>>>> +	{
>>>>>> +		struct device *child_port_dev __free(put_device) =
>>>>>> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
>>>>>> +					match_port_by_parent_dport);
>>>>>> +
>>>>>> +		cb(dport->dport_dev, userdata);
>>>>> This is going through all the downstream ports
>>>>>> +
>>>>>> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
>>>>>> +	}
>>>>>> +
>>>>>> +	if (is_cxl_endpoint(port))
>>>>>> +		cb(port->uport_dev->parent, userdata);
>>>>> And this is the downstream parent port of the endpoint device
>>>>>
>>>>> Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.
>>>> Sure, I'll change that.
>>>>> So in the current implementation,
>>>>> 1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
>>>>> 2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
>>>>> 3. Root port. It checks all the downstream ports for errors.
>>>>> Is this the correct understanding of what this function does?
>>>> Yes. The ordering is different as you pointed out. I can move the endpoint 
>>>> check earlier with an early return. 
>>> As the endpoint, what is the reason the check the parent dport? Pardon my ignorance.
>> There is none. An endpoint port will not have downstream ports.
> parent dport. It would be the root port or the switch downstream port. This is what the current code is doing:
>
>>>>>> +	if (is_cxl_endpoint(port))
>>>>>> +		cb(port->uport_dev->parent, userdata);
> DJ
>
>   

Yes. I need to change port->uport_dev->parent to be port->uport_dev. Thanks. Terry

Re: [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery

Posted by Dave Jiang 4 months, 1 week ago


On 10/1/25 6:58 AM, Bowman, Terry wrote:
> 
> 
> On 9/30/2025 11:46 AM, Dave Jiang wrote:
>>
>> On 9/30/25 9:43 AM, Bowman, Terry wrote:
>>>
>>> On 9/30/2025 11:13 AM, Dave Jiang wrote:
>>>> On 9/30/25 7:38 AM, Bowman, Terry wrote:
>>>>> On 9/29/2025 7:26 PM, Dave Jiang wrote:
>>>>>> On 9/25/25 3:34 PM, Terry Bowman wrote:
>>>>>>> Populate the cxl_do_recovery() function with uncorrectable protocol error (UCE)
>>>>>>> handling. Follow similar design as found in PCIe error driver,
>>>>>>> pcie_do_recovery(). One difference is cxl_do_recovery() will treat all UCEs
>>>>>>> as fatal with a kernel panic. This is to prevent corruption on CXL memory.
>>>>>>>
>>>>>>> Introduce cxl_walk_port(). Make this analogous to pci_walk_bridge() but walking
>>>>>>> CXL ports instead. This will iterate through the CXL topology from the
>>>>>>> erroring device through the downstream CXL Ports and Endpoints.
>>>>>>>
>>>>>>> Export pci_aer_clear_fatal_status() for CXL to use if a UCE is not found.
>>>>>>>
>>>>>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>>>>>
>>>>>>> ---
>>>>>>>
>>>>>>> Changes in v11->v12:
>>>>>>> - Cleaned up port discovery in cxl_do_recovery() (Dave)
>>>>>>> - Added PCI_EXP_TYPE_RC_END to type check in cxl_report_error_detected()
>>>>>>>
>>>>>>> Changes in v10->v11:
>>>>>>> - pci_ers_merge_results() - Move to earlier patch
>>>>>>> ---
>>>>>>>  drivers/cxl/core/ras.c | 111 +++++++++++++++++++++++++++++++++++++++++
>>>>>>>  1 file changed, 111 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>>>>>> index 7e8d63c32d72..45f92defca64 100644
>>>>>>> --- a/drivers/cxl/core/ras.c
>>>>>>> +++ b/drivers/cxl/core/ras.c
>>>>>>> @@ -443,8 +443,119 @@ void cxl_endpoint_port_init_ras(struct cxl_port *ep)
>>>>>>>  }
>>>>>>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_port_init_ras, "CXL");
>>>>>>>  
>>>>>>> +static int cxl_report_error_detected(struct device *dev, void *data)
>>>>>>> +{
>>>>>>> +	struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> +	pci_ers_result_t vote, *result = data;
>>>>>>> +
>>>>>>> +	guard(device)(dev);
>>>>>>> +
>>>>>>> +	if ((pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) ||
>>>>>>> +	    (pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END)) {
>>>>>>> +		if (!cxl_pci_drv_bound(pdev))
>>>>>>> +			return 0;
>>>>>>> +
>>>>>>> +		vote = cxl_error_detected(dev);
>>>>>>> +	} else {
>>>>>>> +		vote = cxl_port_error_detected(dev);
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	*result = pci_ers_merge_result(*result, vote);
>>>>>>> +
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev)
>>>>>>> +{
>>>>>>> +	struct cxl_port *port;
>>>>>>> +
>>>>>>> +	if (!is_cxl_port(dev))
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>> +	port = to_cxl_port(dev);
>>>>>>> +
>>>>>>> +	return port->parent_dport->dport_dev == dport_dev;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void cxl_walk_port(struct device *port_dev,
>>>>>>> +			  int (*cb)(struct device *, void *),
>>>>>>> +			  void *userdata)
>>>>>>> +{
>>>>>>> +	struct cxl_dport *dport = NULL;
>>>>>>> +	struct cxl_port *port;
>>>>>>> +	unsigned long index;
>>>>>>> +
>>>>>>> +	if (!port_dev)
>>>>>>> +		return;
>>>>>>> +
>>>>>>> +	port = to_cxl_port(port_dev);
>>>>>>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>>>>>>> +		cb(port->uport_dev, userdata);
>>>>>> Could use some comments on what is being walked. Also an explanation of what is happening here would be good.
>>>>> Ok
>>>>>> If this is an endpoint port, this would be the PCI endpoint device.
>>>>>> If it's a switch port, then this is the upstream port.
>>>>>> If it's a root port, this is skipped.
>>>>>>
>>>>>>> +
>>>>>>> +	xa_for_each(&port->dports, index, dport)
>>>>>>> +	{
>>>>>>> +		struct device *child_port_dev __free(put_device) =
>>>>>>> +			bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev,
>>>>>>> +					match_port_by_parent_dport);
>>>>>>> +
>>>>>>> +		cb(dport->dport_dev, userdata);
>>>>>> This is going through all the downstream ports
>>>>>>> +
>>>>>>> +		cxl_walk_port(child_port_dev, cxl_report_error_detected, userdata);
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	if (is_cxl_endpoint(port))
>>>>>>> +		cb(port->uport_dev->parent, userdata);
>>>>>> And this is the downstream parent port of the endpoint device
>>>>>>
>>>>>> Why not move this before the xa_for_each() and return early? endpoint ports don't have dports, no need to even try to run that block above.
>>>>> Sure, I'll change that.
>>>>>> So in the current implementation,
>>>>>> 1. Endpoint. It checks the device, and then it checks the downstream parent port for errors. Is checking the parent dport necessary?
>>>>>> 2. Switch. It checks the upstream port, then it checks all the downstream ports for errors.
>>>>>> 3. Root port. It checks all the downstream ports for errors.
>>>>>> Is this the correct understanding of what this function does?
>>>>> Yes. The ordering is different as you pointed out. I can move the endpoint 
>>>>> check earlier with an early return. 
>>>> As the endpoint, what is the reason the check the parent dport? Pardon my ignorance.
>>> There is none. An endpoint port will not have downstream ports.
>> parent dport. It would be the root port or the switch downstream port. This is what the current code is doing:
>>
>>>>>>> +	if (is_cxl_endpoint(port))
>>>>>>> +		cb(port->uport_dev->parent, userdata);
>> DJ
>>
>>   
> 
> Yes. I need to change port->uport_dev->parent to be port->uport_dev. Thanks. Terry

I believe your first chunk already covered the endpoint device:
>>>>>>> +	if (port->uport_dev && dev_is_pci(port->uport_dev))
>>>>>>> +		cb(port->uport_dev, userdata);

So you can probably just drop the last chunk entirely.