Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
from the CXL Virtual Hierarchy (VH) handling. This is because of the
differences in the RCH and VH topologies. Improve the maintainability and
add ability to enable/disable RCH handling.
Move and combine the RCH handling code into a single block conditionally
compiled with the CONFIG_CXL_RCH_RAS kernel config.
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
v10->v11:
- New patch
---
drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
1 file changed, 90 insertions(+), 85 deletions(-)
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 0875ce8116ff..f42f9a255ef8 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -126,6 +126,7 @@ void cxl_ras_exit(void)
cancel_work_sync(&cxl_cper_prot_err_work);
}
+#ifdef CONFIG_CXL_RCH_RAS
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
{
resource_size_t aer_phys;
@@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
}
}
-static void cxl_dport_map_ras(struct cxl_dport *dport)
-{
- struct cxl_register_map *map = &dport->reg_map;
- struct device *dev = dport->dport_dev;
-
- if (!map->component_map.ras.valid)
- dev_dbg(dev, "RAS registers not found\n");
- else if (cxl_map_component_regs(map, &dport->regs.component,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
- dev_dbg(dev, "Failed to map RAS capability.\n");
-}
-
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
{
void __iomem *aer_base = dport->regs.dport_aer;
@@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
}
+/*
+ * Copy the AER capability registers using 32 bit read accesses.
+ * This is necessary because RCRB AER capability is MMIO mapped. Clear the
+ * status after copying.
+ *
+ * @aer_base: base address of AER capability block in RCRB
+ * @aer_regs: destination for copying AER capability
+ */
+static bool cxl_rch_get_aer_info(void __iomem *aer_base,
+ struct aer_capability_regs *aer_regs)
+{
+ int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
+ u32 *aer_regs_buf = (u32 *)aer_regs;
+ int n;
+
+ if (!aer_base)
+ return false;
+
+ /* Use readl() to guarantee 32-bit accesses */
+ for (n = 0; n < read_cnt; n++)
+ aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
+
+ writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
+ writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
+
+ return true;
+}
+
+/* Get AER severity. Return false if there is no error. */
+static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
+ int *severity)
+{
+ if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
+ if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
+ *severity = AER_FATAL;
+ else
+ *severity = AER_NONFATAL;
+ return true;
+ }
+
+ if (aer_regs->cor_status & ~aer_regs->cor_mask) {
+ *severity = AER_CORRECTABLE;
+ return true;
+ }
+
+ return false;
+}
+
+static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ struct aer_capability_regs aer_regs;
+ struct cxl_dport *dport;
+ int severity;
+
+ struct cxl_port *port __free(put_cxl_port) =
+ cxl_pci_find_port(pdev, &dport);
+ if (!port)
+ return;
+
+ if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
+ return;
+
+ if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
+ return;
+
+ pci_print_aer(pdev, severity, &aer_regs);
+ if (severity == AER_CORRECTABLE)
+ cxl_handle_cor_ras(cxlds, dport->regs.ras);
+ else
+ cxl_handle_ras(cxlds, dport->regs.ras);
+}
+#else
+static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
+static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
+static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
+#endif
+
+static void cxl_dport_map_ras(struct cxl_dport *dport)
+{
+ struct cxl_register_map *map = &dport->reg_map;
+ struct device *dev = dport->dport_dev;
+
+ if (!map->component_map.ras.valid)
+ dev_dbg(dev, "RAS registers not found\n");
+ else if (cxl_map_component_regs(map, &dport->regs.component,
+ BIT(CXL_CM_CAP_CAP_ID_RAS)))
+ dev_dbg(dev, "Failed to map RAS capability.\n");
+}
/**
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
@@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
return true;
}
-/*
- * Copy the AER capability registers using 32 bit read accesses.
- * This is necessary because RCRB AER capability is MMIO mapped. Clear the
- * status after copying.
- *
- * @aer_base: base address of AER capability block in RCRB
- * @aer_regs: destination for copying AER capability
- */
-static bool cxl_rch_get_aer_info(void __iomem *aer_base,
- struct aer_capability_regs *aer_regs)
-{
- int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
- u32 *aer_regs_buf = (u32 *)aer_regs;
- int n;
-
- if (!aer_base)
- return false;
-
- /* Use readl() to guarantee 32-bit accesses */
- for (n = 0; n < read_cnt; n++)
- aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
-
- writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
- writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
-
- return true;
-}
-
-/* Get AER severity. Return false if there is no error. */
-static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
- int *severity)
-{
- if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
- if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
- *severity = AER_FATAL;
- else
- *severity = AER_NONFATAL;
- return true;
- }
-
- if (aer_regs->cor_status & ~aer_regs->cor_mask) {
- *severity = AER_CORRECTABLE;
- return true;
- }
-
- return false;
-}
-
-static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
-{
- struct pci_dev *pdev = to_pci_dev(cxlds->dev);
- struct aer_capability_regs aer_regs;
- struct cxl_dport *dport;
- int severity;
-
- struct cxl_port *port __free(put_cxl_port) =
- cxl_pci_find_port(pdev, &dport);
- if (!port)
- return;
-
- if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
- return;
-
- if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
- return;
-
- pci_print_aer(pdev, severity, &aer_regs);
- if (severity == AER_CORRECTABLE)
- cxl_handle_cor_ras(cxlds, dport->regs.ras);
- else
- cxl_handle_ras(cxlds, dport->regs.ras);
-}
-
void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
--
2.51.0.rc2.21.ge5ab6b3e5a
On Tue, 26 Aug 2025 20:35:20 -0500
Terry Bowman <terry.bowman@amd.com> wrote:
> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
> from the CXL Virtual Hierarchy (VH) handling. This is because of the
> differences in the RCH and VH topologies. Improve the maintainability and
> add ability to enable/disable RCH handling.
>
> Move and combine the RCH handling code into a single block conditionally
> compiled with the CONFIG_CXL_RCH_RAS kernel config.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>
How painful to move this to a ras_rch.c file and conditionally compile that?
Would want to do that is some merged thing with patch 1 though, rather than
moving at least some of the code twice.
> ---
> v10->v11:
> - New patch
> ---
> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
> 1 file changed, 90 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> index 0875ce8116ff..f42f9a255ef8 100644
> --- a/drivers/cxl/core/ras.c
> +++ b/drivers/cxl/core/ras.c
> @@ -126,6 +126,7 @@ void cxl_ras_exit(void)
> cancel_work_sync(&cxl_cper_prot_err_work);
> }
>
> +#ifdef CONFIG_CXL_RCH_RAS
> static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> {
> resource_size_t aer_phys;
> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> }
> }
>
> -static void cxl_dport_map_ras(struct cxl_dport *dport)
> -{
> - struct cxl_register_map *map = &dport->reg_map;
> - struct device *dev = dport->dport_dev;
> -
> - if (!map->component_map.ras.valid)
> - dev_dbg(dev, "RAS registers not found\n");
> - else if (cxl_map_component_regs(map, &dport->regs.component,
> - BIT(CXL_CM_CAP_CAP_ID_RAS)))
> - dev_dbg(dev, "Failed to map RAS capability.\n");
> -}
> -
> static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> {
> void __iomem *aer_base = dport->regs.dport_aer;
> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
> }
>
> +/*
> + * Copy the AER capability registers using 32 bit read accesses.
> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
> + * status after copying.
> + *
> + * @aer_base: base address of AER capability block in RCRB
> + * @aer_regs: destination for copying AER capability
> + */
> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
> + struct aer_capability_regs *aer_regs)
> +{
> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
> + u32 *aer_regs_buf = (u32 *)aer_regs;
> + int n;
> +
> + if (!aer_base)
> + return false;
> +
> + /* Use readl() to guarantee 32-bit accesses */
> + for (n = 0; n < read_cnt; n++)
> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
> +
> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
> +
> + return true;
> +}
> +
> +/* Get AER severity. Return false if there is no error. */
> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
> + int *severity)
> +{
> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
> + *severity = AER_FATAL;
> + else
> + *severity = AER_NONFATAL;
> + return true;
> + }
> +
> + if (aer_regs->cor_status & ~aer_regs->cor_mask) {
> + *severity = AER_CORRECTABLE;
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
> +{
> + struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> + struct aer_capability_regs aer_regs;
> + struct cxl_dport *dport;
> + int severity;
> +
> + struct cxl_port *port __free(put_cxl_port) =
> + cxl_pci_find_port(pdev, &dport);
> + if (!port)
> + return;
> +
> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
> + return;
> +
> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
> + return;
> +
> + pci_print_aer(pdev, severity, &aer_regs);
> + if (severity == AER_CORRECTABLE)
> + cxl_handle_cor_ras(cxlds, dport->regs.ras);
> + else
> + cxl_handle_ras(cxlds, dport->regs.ras);
> +}
> +#else
> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
> +#endif
> +
> +static void cxl_dport_map_ras(struct cxl_dport *dport)
> +{
> + struct cxl_register_map *map = &dport->reg_map;
> + struct device *dev = dport->dport_dev;
> +
> + if (!map->component_map.ras.valid)
> + dev_dbg(dev, "RAS registers not found\n");
> + else if (cxl_map_component_regs(map, &dport->regs.component,
> + BIT(CXL_CM_CAP_CAP_ID_RAS)))
> + dev_dbg(dev, "Failed to map RAS capability.\n");
> +}
>
> /**
> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
> return true;
> }
>
> -/*
> - * Copy the AER capability registers using 32 bit read accesses.
> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
> - * status after copying.
> - *
> - * @aer_base: base address of AER capability block in RCRB
> - * @aer_regs: destination for copying AER capability
> - */
> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
> - struct aer_capability_regs *aer_regs)
> -{
> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
> - u32 *aer_regs_buf = (u32 *)aer_regs;
> - int n;
> -
> - if (!aer_base)
> - return false;
> -
> - /* Use readl() to guarantee 32-bit accesses */
> - for (n = 0; n < read_cnt; n++)
> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
> -
> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
> -
> - return true;
> -}
> -
> -/* Get AER severity. Return false if there is no error. */
> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
> - int *severity)
> -{
> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
> - *severity = AER_FATAL;
> - else
> - *severity = AER_NONFATAL;
> - return true;
> - }
> -
> - if (aer_regs->cor_status & ~aer_regs->cor_mask) {
> - *severity = AER_CORRECTABLE;
> - return true;
> - }
> -
> - return false;
> -}
> -
> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
> -{
> - struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> - struct aer_capability_regs aer_regs;
> - struct cxl_dport *dport;
> - int severity;
> -
> - struct cxl_port *port __free(put_cxl_port) =
> - cxl_pci_find_port(pdev, &dport);
> - if (!port)
> - return;
> -
> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
> - return;
> -
> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
> - return;
> -
> - pci_print_aer(pdev, severity, &aer_regs);
> - if (severity == AER_CORRECTABLE)
> - cxl_handle_cor_ras(cxlds, dport->regs.ras);
> - else
> - cxl_handle_ras(cxlds, dport->regs.ras);
> -}
> -
> void cxl_cor_error_detected(struct pci_dev *pdev)
> {
> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 8/29/2025 10:33 AM, Jonathan Cameron wrote:
> On Tue, 26 Aug 2025 20:35:20 -0500
> Terry Bowman <terry.bowman@amd.com> wrote:
>
>> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
>> from the CXL Virtual Hierarchy (VH) handling. This is because of the
>> differences in the RCH and VH topologies. Improve the maintainability and
>> add ability to enable/disable RCH handling.
>>
>> Move and combine the RCH handling code into a single block conditionally
>> compiled with the CONFIG_CXL_RCH_RAS kernel config.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>
> How painful to move this to a ras_rch.c file and conditionally compile that?
>
> Would want to do that is some merged thing with patch 1 though, rather than
> moving at least some of the code twice.
>
I don't see an issue and the effort would be a simple rework of patch1 as you
mentioned. But, it would drop the 'reviewed-by' sign-offs. Should we check with
others about this too?
Terry
>
>> ---
>> v10->v11:
>> - New patch
>> ---
>> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
>> 1 file changed, 90 insertions(+), 85 deletions(-)
>>
>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>> index 0875ce8116ff..f42f9a255ef8 100644
>> --- a/drivers/cxl/core/ras.c
>> +++ b/drivers/cxl/core/ras.c
>> @@ -126,6 +126,7 @@ void cxl_ras_exit(void)
>> cancel_work_sync(&cxl_cper_prot_err_work);
>> }
>>
>> +#ifdef CONFIG_CXL_RCH_RAS
>> static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>> {
>> resource_size_t aer_phys;
>> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>> }
>> }
>>
>> -static void cxl_dport_map_ras(struct cxl_dport *dport)
>> -{
>> - struct cxl_register_map *map = &dport->reg_map;
>> - struct device *dev = dport->dport_dev;
>> -
>> - if (!map->component_map.ras.valid)
>> - dev_dbg(dev, "RAS registers not found\n");
>> - else if (cxl_map_component_regs(map, &dport->regs.component,
>> - BIT(CXL_CM_CAP_CAP_ID_RAS)))
>> - dev_dbg(dev, "Failed to map RAS capability.\n");
>> -}
>> -
>> static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>> {
>> void __iomem *aer_base = dport->regs.dport_aer;
>> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>> }
>>
>> +/*
>> + * Copy the AER capability registers using 32 bit read accesses.
>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>> + * status after copying.
>> + *
>> + * @aer_base: base address of AER capability block in RCRB
>> + * @aer_regs: destination for copying AER capability
>> + */
>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>> + struct aer_capability_regs *aer_regs)
>> +{
>> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>> + u32 *aer_regs_buf = (u32 *)aer_regs;
>> + int n;
>> +
>> + if (!aer_base)
>> + return false;
>> +
>> + /* Use readl() to guarantee 32-bit accesses */
>> + for (n = 0; n < read_cnt; n++)
>> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>> +
>> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>> +
>> + return true;
>> +}
>> +
>> +/* Get AER severity. Return false if there is no error. */
>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>> + int *severity)
>> +{
>> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>> + *severity = AER_FATAL;
>> + else
>> + *severity = AER_NONFATAL;
>> + return true;
>> + }
>> +
>> + if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>> + *severity = AER_CORRECTABLE;
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>> +{
>> + struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>> + struct aer_capability_regs aer_regs;
>> + struct cxl_dport *dport;
>> + int severity;
>> +
>> + struct cxl_port *port __free(put_cxl_port) =
>> + cxl_pci_find_port(pdev, &dport);
>> + if (!port)
>> + return;
>> +
>> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>> + return;
>> +
>> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>> + return;
>> +
>> + pci_print_aer(pdev, severity, &aer_regs);
>> + if (severity == AER_CORRECTABLE)
>> + cxl_handle_cor_ras(cxlds, dport->regs.ras);
>> + else
>> + cxl_handle_ras(cxlds, dport->regs.ras);
>> +}
>> +#else
>> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
>> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
>> +#endif
>> +
>> +static void cxl_dport_map_ras(struct cxl_dport *dport)
>> +{
>> + struct cxl_register_map *map = &dport->reg_map;
>> + struct device *dev = dport->dport_dev;
>> +
>> + if (!map->component_map.ras.valid)
>> + dev_dbg(dev, "RAS registers not found\n");
>> + else if (cxl_map_component_regs(map, &dport->regs.component,
>> + BIT(CXL_CM_CAP_CAP_ID_RAS)))
>> + dev_dbg(dev, "Failed to map RAS capability.\n");
>> +}
>>
>> /**
>> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>> return true;
>> }
>>
>> -/*
>> - * Copy the AER capability registers using 32 bit read accesses.
>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>> - * status after copying.
>> - *
>> - * @aer_base: base address of AER capability block in RCRB
>> - * @aer_regs: destination for copying AER capability
>> - */
>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>> - struct aer_capability_regs *aer_regs)
>> -{
>> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>> - u32 *aer_regs_buf = (u32 *)aer_regs;
>> - int n;
>> -
>> - if (!aer_base)
>> - return false;
>> -
>> - /* Use readl() to guarantee 32-bit accesses */
>> - for (n = 0; n < read_cnt; n++)
>> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>> -
>> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>> -
>> - return true;
>> -}
>> -
>> -/* Get AER severity. Return false if there is no error. */
>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>> - int *severity)
>> -{
>> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>> - *severity = AER_FATAL;
>> - else
>> - *severity = AER_NONFATAL;
>> - return true;
>> - }
>> -
>> - if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>> - *severity = AER_CORRECTABLE;
>> - return true;
>> - }
>> -
>> - return false;
>> -}
>> -
>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>> -{
>> - struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>> - struct aer_capability_regs aer_regs;
>> - struct cxl_dport *dport;
>> - int severity;
>> -
>> - struct cxl_port *port __free(put_cxl_port) =
>> - cxl_pci_find_port(pdev, &dport);
>> - if (!port)
>> - return;
>> -
>> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>> - return;
>> -
>> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>> - return;
>> -
>> - pci_print_aer(pdev, severity, &aer_regs);
>> - if (severity == AER_CORRECTABLE)
>> - cxl_handle_cor_ras(cxlds, dport->regs.ras);
>> - else
>> - cxl_handle_ras(cxlds, dport->regs.ras);
>> -}
>> -
>> void cxl_cor_error_detected(struct pci_dev *pdev)
>> {
>> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 9/11/25 10:48 AM, Bowman, Terry wrote:
>
>
> On 8/29/2025 10:33 AM, Jonathan Cameron wrote:
>> On Tue, 26 Aug 2025 20:35:20 -0500
>> Terry Bowman <terry.bowman@amd.com> wrote:
>>
>>> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
>>> from the CXL Virtual Hierarchy (VH) handling. This is because of the
>>> differences in the RCH and VH topologies. Improve the maintainability and
>>> add ability to enable/disable RCH handling.
>>>
>>> Move and combine the RCH handling code into a single block conditionally
>>> compiled with the CONFIG_CXL_RCH_RAS kernel config.
>>>
>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>
>> How painful to move this to a ras_rch.c file and conditionally compile that?
>>
>> Would want to do that is some merged thing with patch 1 though, rather than
>> moving at least some of the code twice.
>>
>
> I don't see an issue and the effort would be a simple rework of patch1 as you
> mentioned. But, it would drop the 'reviewed-by' sign-offs. Should we check with
> others about this too?
I would go ahead and do it.
>
> Terry
>
>>
>>> ---
>>> v10->v11:
>>> - New patch
>>> ---
>>> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
>>> 1 file changed, 90 insertions(+), 85 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>> index 0875ce8116ff..f42f9a255ef8 100644
>>> --- a/drivers/cxl/core/ras.c
>>> +++ b/drivers/cxl/core/ras.c
>>> @@ -126,6 +126,7 @@ void cxl_ras_exit(void)
>>> cancel_work_sync(&cxl_cper_prot_err_work);
>>> }
>>>
>>> +#ifdef CONFIG_CXL_RCH_RAS
>>> static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>> {
>>> resource_size_t aer_phys;
>>> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>> }
>>> }
>>>
>>> -static void cxl_dport_map_ras(struct cxl_dport *dport)
>>> -{
>>> - struct cxl_register_map *map = &dport->reg_map;
>>> - struct device *dev = dport->dport_dev;
>>> -
>>> - if (!map->component_map.ras.valid)
>>> - dev_dbg(dev, "RAS registers not found\n");
>>> - else if (cxl_map_component_regs(map, &dport->regs.component,
>>> - BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>> - dev_dbg(dev, "Failed to map RAS capability.\n");
>>> -}
>>> -
>>> static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>> {
>>> void __iomem *aer_base = dport->regs.dport_aer;
>>> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>>> }
>>>
>>> +/*
>>> + * Copy the AER capability registers using 32 bit read accesses.
>>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>> + * status after copying.
>>> + *
>>> + * @aer_base: base address of AER capability block in RCRB
>>> + * @aer_regs: destination for copying AER capability
>>> + */
>>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>> + struct aer_capability_regs *aer_regs)
>>> +{
>>> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>> + u32 *aer_regs_buf = (u32 *)aer_regs;
>>> + int n;
>>> +
>>> + if (!aer_base)
>>> + return false;
>>> +
>>> + /* Use readl() to guarantee 32-bit accesses */
>>> + for (n = 0; n < read_cnt; n++)
>>> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>> +
>>> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>> +
>>> + return true;
>>> +}
>>> +
>>> +/* Get AER severity. Return false if there is no error. */
>>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>> + int *severity)
>>> +{
>>> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>> + *severity = AER_FATAL;
>>> + else
>>> + *severity = AER_NONFATAL;
>>> + return true;
>>> + }
>>> +
>>> + if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>> + *severity = AER_CORRECTABLE;
>>> + return true;
>>> + }
>>> +
>>> + return false;
>>> +}
>>> +
>>> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>> +{
>>> + struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>> + struct aer_capability_regs aer_regs;
>>> + struct cxl_dport *dport;
>>> + int severity;
>>> +
>>> + struct cxl_port *port __free(put_cxl_port) =
>>> + cxl_pci_find_port(pdev, &dport);
>>> + if (!port)
>>> + return;
>>> +
>>> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>> + return;
>>> +
>>> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>> + return;
>>> +
>>> + pci_print_aer(pdev, severity, &aer_regs);
>>> + if (severity == AER_CORRECTABLE)
>>> + cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>> + else
>>> + cxl_handle_ras(cxlds, dport->regs.ras);
>>> +}
>>> +#else
>>> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
>>> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
>>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
>>> +#endif
>>> +
>>> +static void cxl_dport_map_ras(struct cxl_dport *dport)
>>> +{
>>> + struct cxl_register_map *map = &dport->reg_map;
>>> + struct device *dev = dport->dport_dev;
>>> +
>>> + if (!map->component_map.ras.valid)
>>> + dev_dbg(dev, "RAS registers not found\n");
>>> + else if (cxl_map_component_regs(map, &dport->regs.component,
>>> + BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>> + dev_dbg(dev, "Failed to map RAS capability.\n");
>>> +}
>>>
>>> /**
>>> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>>> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>> return true;
>>> }
>>>
>>> -/*
>>> - * Copy the AER capability registers using 32 bit read accesses.
>>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>> - * status after copying.
>>> - *
>>> - * @aer_base: base address of AER capability block in RCRB
>>> - * @aer_regs: destination for copying AER capability
>>> - */
>>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>> - struct aer_capability_regs *aer_regs)
>>> -{
>>> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>> - u32 *aer_regs_buf = (u32 *)aer_regs;
>>> - int n;
>>> -
>>> - if (!aer_base)
>>> - return false;
>>> -
>>> - /* Use readl() to guarantee 32-bit accesses */
>>> - for (n = 0; n < read_cnt; n++)
>>> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>> -
>>> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>> -
>>> - return true;
>>> -}
>>> -
>>> -/* Get AER severity. Return false if there is no error. */
>>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>> - int *severity)
>>> -{
>>> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>> - *severity = AER_FATAL;
>>> - else
>>> - *severity = AER_NONFATAL;
>>> - return true;
>>> - }
>>> -
>>> - if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>> - *severity = AER_CORRECTABLE;
>>> - return true;
>>> - }
>>> -
>>> - return false;
>>> -}
>>> -
>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>> -{
>>> - struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>> - struct aer_capability_regs aer_regs;
>>> - struct cxl_dport *dport;
>>> - int severity;
>>> -
>>> - struct cxl_port *port __free(put_cxl_port) =
>>> - cxl_pci_find_port(pdev, &dport);
>>> - if (!port)
>>> - return;
>>> -
>>> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>> - return;
>>> -
>>> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>> - return;
>>> -
>>> - pci_print_aer(pdev, severity, &aer_regs);
>>> - if (severity == AER_CORRECTABLE)
>>> - cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>> - else
>>> - cxl_handle_ras(cxlds, dport->regs.ras);
>>> -}
>>> -
>>> void cxl_cor_error_detected(struct pci_dev *pdev)
>>> {
>>> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>
On 9/11/2025 2:41 PM, Dave Jiang wrote:
>
> On 9/11/25 10:48 AM, Bowman, Terry wrote:
>>
>> On 8/29/2025 10:33 AM, Jonathan Cameron wrote:
>>> On Tue, 26 Aug 2025 20:35:20 -0500
>>> Terry Bowman <terry.bowman@amd.com> wrote:
>>>
>>>> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
>>>> from the CXL Virtual Hierarchy (VH) handling. This is because of the
>>>> differences in the RCH and VH topologies. Improve the maintainability and
>>>> add ability to enable/disable RCH handling.
>>>>
>>>> Move and combine the RCH handling code into a single block conditionally
>>>> compiled with the CONFIG_CXL_RCH_RAS kernel config.
>>>>
>>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>>>>
>>> How painful to move this to a ras_rch.c file and conditionally compile that?
>>>
>>> Would want to do that is some merged thing with patch 1 though, rather than
>>> moving at least some of the code twice.
>>>
>> I don't see an issue and the effort would be a simple rework of patch1 as you
>> mentioned. But, it would drop the 'reviewed-by' sign-offs. Should we check with
>> others about this too?
> I would go ahead and do it.
>
Dan mentioned he prefers leaving it all together in drivers/cxl/ras.c.
https://lore.kernel.org/linux-cxl/68829db3d3e63_134cc710080@dwillia2-xfh.jf.intel.com.notmuch/
Terry
>> Terry
>>
>>>> ---
>>>> v10->v11:
>>>> - New patch
>>>> ---
>>>> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
>>>> 1 file changed, 90 insertions(+), 85 deletions(-)
>>>>
>>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
>>>> index 0875ce8116ff..f42f9a255ef8 100644
>>>> --- a/drivers/cxl/core/ras.c
>>>> +++ b/drivers/cxl/core/ras.c
>>>> @@ -126,6 +126,7 @@ void cxl_ras_exit(void)
>>>> cancel_work_sync(&cxl_cper_prot_err_work);
>>>> }
>>>>
>>>> +#ifdef CONFIG_CXL_RCH_RAS
>>>> static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>>> {
>>>> resource_size_t aer_phys;
>>>> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>>> }
>>>> }
>>>>
>>>> -static void cxl_dport_map_ras(struct cxl_dport *dport)
>>>> -{
>>>> - struct cxl_register_map *map = &dport->reg_map;
>>>> - struct device *dev = dport->dport_dev;
>>>> -
>>>> - if (!map->component_map.ras.valid)
>>>> - dev_dbg(dev, "RAS registers not found\n");
>>>> - else if (cxl_map_component_regs(map, &dport->regs.component,
>>>> - BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>>> - dev_dbg(dev, "Failed to map RAS capability.\n");
>>>> -}
>>>> -
>>>> static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>>> {
>>>> void __iomem *aer_base = dport->regs.dport_aer;
>>>> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>>> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>>>> }
>>>>
>>>> +/*
>>>> + * Copy the AER capability registers using 32 bit read accesses.
>>>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>>> + * status after copying.
>>>> + *
>>>> + * @aer_base: base address of AER capability block in RCRB
>>>> + * @aer_regs: destination for copying AER capability
>>>> + */
>>>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>>> + struct aer_capability_regs *aer_regs)
>>>> +{
>>>> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>>> + u32 *aer_regs_buf = (u32 *)aer_regs;
>>>> + int n;
>>>> +
>>>> + if (!aer_base)
>>>> + return false;
>>>> +
>>>> + /* Use readl() to guarantee 32-bit accesses */
>>>> + for (n = 0; n < read_cnt; n++)
>>>> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>>> +
>>>> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>>> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>>> +
>>>> + return true;
>>>> +}
>>>> +
>>>> +/* Get AER severity. Return false if there is no error. */
>>>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>>> + int *severity)
>>>> +{
>>>> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>>> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>>> + *severity = AER_FATAL;
>>>> + else
>>>> + *severity = AER_NONFATAL;
>>>> + return true;
>>>> + }
>>>> +
>>>> + if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>>> + *severity = AER_CORRECTABLE;
>>>> + return true;
>>>> + }
>>>> +
>>>> + return false;
>>>> +}
>>>> +
>>>> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>>> +{
>>>> + struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>>> + struct aer_capability_regs aer_regs;
>>>> + struct cxl_dport *dport;
>>>> + int severity;
>>>> +
>>>> + struct cxl_port *port __free(put_cxl_port) =
>>>> + cxl_pci_find_port(pdev, &dport);
>>>> + if (!port)
>>>> + return;
>>>> +
>>>> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>>> + return;
>>>> +
>>>> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>>> + return;
>>>> +
>>>> + pci_print_aer(pdev, severity, &aer_regs);
>>>> + if (severity == AER_CORRECTABLE)
>>>> + cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>>> + else
>>>> + cxl_handle_ras(cxlds, dport->regs.ras);
>>>> +}
>>>> +#else
>>>> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
>>>> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
>>>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
>>>> +#endif
>>>> +
>>>> +static void cxl_dport_map_ras(struct cxl_dport *dport)
>>>> +{
>>>> + struct cxl_register_map *map = &dport->reg_map;
>>>> + struct device *dev = dport->dport_dev;
>>>> +
>>>> + if (!map->component_map.ras.valid)
>>>> + dev_dbg(dev, "RAS registers not found\n");
>>>> + else if (cxl_map_component_regs(map, &dport->regs.component,
>>>> + BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>>> + dev_dbg(dev, "Failed to map RAS capability.\n");
>>>> +}
>>>>
>>>> /**
>>>> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>>>> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>>> return true;
>>>> }
>>>>
>>>> -/*
>>>> - * Copy the AER capability registers using 32 bit read accesses.
>>>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>>> - * status after copying.
>>>> - *
>>>> - * @aer_base: base address of AER capability block in RCRB
>>>> - * @aer_regs: destination for copying AER capability
>>>> - */
>>>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>>> - struct aer_capability_regs *aer_regs)
>>>> -{
>>>> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>>> - u32 *aer_regs_buf = (u32 *)aer_regs;
>>>> - int n;
>>>> -
>>>> - if (!aer_base)
>>>> - return false;
>>>> -
>>>> - /* Use readl() to guarantee 32-bit accesses */
>>>> - for (n = 0; n < read_cnt; n++)
>>>> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>>> -
>>>> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>>> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>>> -
>>>> - return true;
>>>> -}
>>>> -
>>>> -/* Get AER severity. Return false if there is no error. */
>>>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>>> - int *severity)
>>>> -{
>>>> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>>> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>>> - *severity = AER_FATAL;
>>>> - else
>>>> - *severity = AER_NONFATAL;
>>>> - return true;
>>>> - }
>>>> -
>>>> - if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>>> - *severity = AER_CORRECTABLE;
>>>> - return true;
>>>> - }
>>>> -
>>>> - return false;
>>>> -}
>>>> -
>>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>>> -{
>>>> - struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>>> - struct aer_capability_regs aer_regs;
>>>> - struct cxl_dport *dport;
>>>> - int severity;
>>>> -
>>>> - struct cxl_port *port __free(put_cxl_port) =
>>>> - cxl_pci_find_port(pdev, &dport);
>>>> - if (!port)
>>>> - return;
>>>> -
>>>> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>>> - return;
>>>> -
>>>> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>>> - return;
>>>> -
>>>> - pci_print_aer(pdev, severity, &aer_regs);
>>>> - if (severity == AER_CORRECTABLE)
>>>> - cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>>> - else
>>>> - cxl_handle_ras(cxlds, dport->regs.ras);
>>>> -}
>>>> -
>>>> void cxl_cor_error_detected(struct pci_dev *pdev)
>>>> {
>>>> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 8/27/25 02:35, Terry Bowman wrote:
> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
> from the CXL Virtual Hierarchy (VH) handling. This is because of the
> differences in the RCH and VH topologies. Improve the maintainability and
> add ability to enable/disable RCH handling.
>
> Move and combine the RCH handling code into a single block conditionally
> compiled with the CONFIG_CXL_RCH_RAS kernel config.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>
> ---
> v10->v11:
> - New patch
> ---
> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
> 1 file changed, 90 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
> index 0875ce8116ff..f42f9a255ef8 100644
> --- a/drivers/cxl/core/ras.c
> +++ b/drivers/cxl/core/ras.c
> @@ -126,6 +126,7 @@ void cxl_ras_exit(void)
> cancel_work_sync(&cxl_cper_prot_err_work);
> }
>
> +#ifdef CONFIG_CXL_RCH_RAS
You are introducing CONFIG_CXL_RCH_RAS in the next patch. Is it correct
to use it here?
> static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> {
> resource_size_t aer_phys;
> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> }
> }
>
> -static void cxl_dport_map_ras(struct cxl_dport *dport)
> -{
> - struct cxl_register_map *map = &dport->reg_map;
> - struct device *dev = dport->dport_dev;
> -
> - if (!map->component_map.ras.valid)
> - dev_dbg(dev, "RAS registers not found\n");
> - else if (cxl_map_component_regs(map, &dport->regs.component,
> - BIT(CXL_CM_CAP_CAP_ID_RAS)))
> - dev_dbg(dev, "Failed to map RAS capability.\n");
> -}
> -
> static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> {
> void __iomem *aer_base = dport->regs.dport_aer;
> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
> }
>
> +/*
> + * Copy the AER capability registers using 32 bit read accesses.
> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
> + * status after copying.
> + *
> + * @aer_base: base address of AER capability block in RCRB
> + * @aer_regs: destination for copying AER capability
> + */
> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
> + struct aer_capability_regs *aer_regs)
> +{
> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
> + u32 *aer_regs_buf = (u32 *)aer_regs;
> + int n;
> +
> + if (!aer_base)
> + return false;
> +
> + /* Use readl() to guarantee 32-bit accesses */
> + for (n = 0; n < read_cnt; n++)
> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
> +
> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
> +
> + return true;
> +}
> +
> +/* Get AER severity. Return false if there is no error. */
> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
> + int *severity)
> +{
> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
> + *severity = AER_FATAL;
> + else
> + *severity = AER_NONFATAL;
> + return true;
> + }
> +
> + if (aer_regs->cor_status & ~aer_regs->cor_mask) {
> + *severity = AER_CORRECTABLE;
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
> +{
> + struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> + struct aer_capability_regs aer_regs;
> + struct cxl_dport *dport;
> + int severity;
> +
> + struct cxl_port *port __free(put_cxl_port) =
> + cxl_pci_find_port(pdev, &dport);
> + if (!port)
> + return;
> +
> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
> + return;
> +
> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
> + return;
> +
> + pci_print_aer(pdev, severity, &aer_regs);
> + if (severity == AER_CORRECTABLE)
> + cxl_handle_cor_ras(cxlds, dport->regs.ras);
> + else
> + cxl_handle_ras(cxlds, dport->regs.ras);
> +}
> +#else
> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
> +#endif
> +
> +static void cxl_dport_map_ras(struct cxl_dport *dport)
> +{
> + struct cxl_register_map *map = &dport->reg_map;
> + struct device *dev = dport->dport_dev;
> +
> + if (!map->component_map.ras.valid)
> + dev_dbg(dev, "RAS registers not found\n");
> + else if (cxl_map_component_regs(map, &dport->regs.component,
> + BIT(CXL_CM_CAP_CAP_ID_RAS)))
> + dev_dbg(dev, "Failed to map RAS capability.\n");
> +}
>
> /**
> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
> return true;
> }
>
> -/*
> - * Copy the AER capability registers using 32 bit read accesses.
> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
> - * status after copying.
> - *
> - * @aer_base: base address of AER capability block in RCRB
> - * @aer_regs: destination for copying AER capability
> - */
> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
> - struct aer_capability_regs *aer_regs)
> -{
> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
> - u32 *aer_regs_buf = (u32 *)aer_regs;
> - int n;
> -
> - if (!aer_base)
> - return false;
> -
> - /* Use readl() to guarantee 32-bit accesses */
> - for (n = 0; n < read_cnt; n++)
> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
> -
> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
> -
> - return true;
> -}
> -
> -/* Get AER severity. Return false if there is no error. */
> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
> - int *severity)
> -{
> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
> - *severity = AER_FATAL;
> - else
> - *severity = AER_NONFATAL;
> - return true;
> - }
> -
> - if (aer_regs->cor_status & ~aer_regs->cor_mask) {
> - *severity = AER_CORRECTABLE;
> - return true;
> - }
> -
> - return false;
> -}
> -
> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
> -{
> - struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> - struct aer_capability_regs aer_regs;
> - struct cxl_dport *dport;
> - int severity;
> -
> - struct cxl_port *port __free(put_cxl_port) =
> - cxl_pci_find_port(pdev, &dport);
> - if (!port)
> - return;
> -
> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
> - return;
> -
> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
> - return;
> -
> - pci_print_aer(pdev, severity, &aer_regs);
> - if (severity == AER_CORRECTABLE)
> - cxl_handle_cor_ras(cxlds, dport->regs.ras);
> - else
> - cxl_handle_ras(cxlds, dport->regs.ras);
> -}
> -
> void cxl_cor_error_detected(struct pci_dev *pdev)
> {
> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 8/28/2025 3:57 AM, Alejandro Lucero Palau wrote: > On 8/27/25 02:35, Terry Bowman wrote: >> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct >> from the CXL Virtual Hierarchy (VH) handling. This is because of the >> differences in the RCH and VH topologies. Improve the maintainability and >> add ability to enable/disable RCH handling. >> >> Move and combine the RCH handling code into a single block conditionally >> compiled with the CONFIG_CXL_RCH_RAS kernel config. >> >> Signed-off-by: Terry Bowman <terry.bowman@amd.com> >> >> --- >> v10->v11: >> - New patch >> --- >> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- >> 1 file changed, 90 insertions(+), 85 deletions(-) >> >> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c >> index 0875ce8116ff..f42f9a255ef8 100644 >> --- a/drivers/cxl/core/ras.c >> +++ b/drivers/cxl/core/ras.c >> @@ -126,6 +126,7 @@ void cxl_ras_exit(void) >> cancel_work_sync(&cxl_cper_prot_err_work); >> } >> >> +#ifdef CONFIG_CXL_RCH_RAS > > You are introducing CONFIG_CXL_RCH_RAS in the next patch. Is it correct > to use it here? > You are correct. I need to move the introduction of Kconfig's CONFIG_CXL_RCH_RAS definition into this patch. Thanks. Terry
© 2016 - 2026 Red Hat, Inc.