Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
from the CXL Virtual Hierarchy (VH) handling. This is because of the
differences in the RCH and VH topologies. Improve the maintainability and
add ability to enable/disable RCH handling.
Move and combine the RCH handling code into a single block conditionally
compiled with the CONFIG_CXL_RCH_RAS kernel config.
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
v10->v11:
- New patch
---
drivers/cxl/core/ras.c | 175 +++++++++++++++++++++--------------------
1 file changed, 90 insertions(+), 85 deletions(-)
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 0875ce8116ff..f42f9a255ef8 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -126,6 +126,7 @@ void cxl_ras_exit(void)
cancel_work_sync(&cxl_cper_prot_err_work);
}
+#ifdef CONFIG_CXL_RCH_RAS
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
{
resource_size_t aer_phys;
@@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
}
}
-static void cxl_dport_map_ras(struct cxl_dport *dport)
-{
- struct cxl_register_map *map = &dport->reg_map;
- struct device *dev = dport->dport_dev;
-
- if (!map->component_map.ras.valid)
- dev_dbg(dev, "RAS registers not found\n");
- else if (cxl_map_component_regs(map, &dport->regs.component,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
- dev_dbg(dev, "Failed to map RAS capability.\n");
-}
-
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
{
void __iomem *aer_base = dport->regs.dport_aer;
@@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
}
+/*
+ * Copy the AER capability registers using 32 bit read accesses.
+ * This is necessary because RCRB AER capability is MMIO mapped. Clear the
+ * status after copying.
+ *
+ * @aer_base: base address of AER capability block in RCRB
+ * @aer_regs: destination for copying AER capability
+ */
+static bool cxl_rch_get_aer_info(void __iomem *aer_base,
+ struct aer_capability_regs *aer_regs)
+{
+ int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
+ u32 *aer_regs_buf = (u32 *)aer_regs;
+ int n;
+
+ if (!aer_base)
+ return false;
+
+ /* Use readl() to guarantee 32-bit accesses */
+ for (n = 0; n < read_cnt; n++)
+ aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
+
+ writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
+ writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
+
+ return true;
+}
+
+/* Get AER severity. Return false if there is no error. */
+static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
+ int *severity)
+{
+ if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
+ if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
+ *severity = AER_FATAL;
+ else
+ *severity = AER_NONFATAL;
+ return true;
+ }
+
+ if (aer_regs->cor_status & ~aer_regs->cor_mask) {
+ *severity = AER_CORRECTABLE;
+ return true;
+ }
+
+ return false;
+}
+
+static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ struct aer_capability_regs aer_regs;
+ struct cxl_dport *dport;
+ int severity;
+
+ struct cxl_port *port __free(put_cxl_port) =
+ cxl_pci_find_port(pdev, &dport);
+ if (!port)
+ return;
+
+ if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
+ return;
+
+ if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
+ return;
+
+ pci_print_aer(pdev, severity, &aer_regs);
+ if (severity == AER_CORRECTABLE)
+ cxl_handle_cor_ras(cxlds, dport->regs.ras);
+ else
+ cxl_handle_ras(cxlds, dport->regs.ras);
+}
+#else
+static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
+static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
+static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
+#endif
+
+static void cxl_dport_map_ras(struct cxl_dport *dport)
+{
+ struct cxl_register_map *map = &dport->reg_map;
+ struct device *dev = dport->dport_dev;
+
+ if (!map->component_map.ras.valid)
+ dev_dbg(dev, "RAS registers not found\n");
+ else if (cxl_map_component_regs(map, &dport->regs.component,
+ BIT(CXL_CM_CAP_CAP_ID_RAS)))
+ dev_dbg(dev, "Failed to map RAS capability.\n");
+}
/**
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
@@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
return true;
}
-/*
- * Copy the AER capability registers using 32 bit read accesses.
- * This is necessary because RCRB AER capability is MMIO mapped. Clear the
- * status after copying.
- *
- * @aer_base: base address of AER capability block in RCRB
- * @aer_regs: destination for copying AER capability
- */
-static bool cxl_rch_get_aer_info(void __iomem *aer_base,
- struct aer_capability_regs *aer_regs)
-{
- int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
- u32 *aer_regs_buf = (u32 *)aer_regs;
- int n;
-
- if (!aer_base)
- return false;
-
- /* Use readl() to guarantee 32-bit accesses */
- for (n = 0; n < read_cnt; n++)
- aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
-
- writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
- writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
-
- return true;
-}
-
-/* Get AER severity. Return false if there is no error. */
-static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
- int *severity)
-{
- if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
- if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
- *severity = AER_FATAL;
- else
- *severity = AER_NONFATAL;
- return true;
- }
-
- if (aer_regs->cor_status & ~aer_regs->cor_mask) {
- *severity = AER_CORRECTABLE;
- return true;
- }
-
- return false;
-}
-
-static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
-{
- struct pci_dev *pdev = to_pci_dev(cxlds->dev);
- struct aer_capability_regs aer_regs;
- struct cxl_dport *dport;
- int severity;
-
- struct cxl_port *port __free(put_cxl_port) =
- cxl_pci_find_port(pdev, &dport);
- if (!port)
- return;
-
- if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
- return;
-
- if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
- return;
-
- pci_print_aer(pdev, severity, &aer_regs);
- if (severity == AER_CORRECTABLE)
- cxl_handle_cor_ras(cxlds, dport->regs.ras);
- else
- cxl_handle_ras(cxlds, dport->regs.ras);
-}
-
void cxl_cor_error_detected(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
--
2.51.0.rc2.21.ge5ab6b3e5a
On Tue, 26 Aug 2025 20:35:20 -0500 Terry Bowman <terry.bowman@amd.com> wrote: > Restricted CXL Host (RCH) protocol error handling uses a procedure distinct > from the CXL Virtual Hierarchy (VH) handling. This is because of the > differences in the RCH and VH topologies. Improve the maintainability and > add ability to enable/disable RCH handling. > > Move and combine the RCH handling code into a single block conditionally > compiled with the CONFIG_CXL_RCH_RAS kernel config. > > Signed-off-by: Terry Bowman <terry.bowman@amd.com> > How painful to move this to a ras_rch.c file and conditionally compile that? Would want to do that is some merged thing with patch 1 though, rather than moving at least some of the code twice. > --- > v10->v11: > - New patch > --- > drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- > 1 file changed, 90 insertions(+), 85 deletions(-) > > diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c > index 0875ce8116ff..f42f9a255ef8 100644 > --- a/drivers/cxl/core/ras.c > +++ b/drivers/cxl/core/ras.c > @@ -126,6 +126,7 @@ void cxl_ras_exit(void) > cancel_work_sync(&cxl_cper_prot_err_work); > } > > +#ifdef CONFIG_CXL_RCH_RAS > static void cxl_dport_map_rch_aer(struct cxl_dport *dport) > { > resource_size_t aer_phys; > @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) > } > } > > -static void cxl_dport_map_ras(struct cxl_dport *dport) > -{ > - struct cxl_register_map *map = &dport->reg_map; > - struct device *dev = dport->dport_dev; > - > - if (!map->component_map.ras.valid) > - dev_dbg(dev, "RAS registers not found\n"); > - else if (cxl_map_component_regs(map, &dport->regs.component, > - BIT(CXL_CM_CAP_CAP_ID_RAS))) > - dev_dbg(dev, "Failed to map RAS capability.\n"); > -} > - > static void cxl_disable_rch_root_ints(struct cxl_dport *dport) > { > void __iomem *aer_base = dport->regs.dport_aer; > @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) > writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); > } > > +/* > + * Copy the AER capability registers using 32 bit read accesses. > + * This is necessary because RCRB AER capability is MMIO mapped. Clear the > + * status after copying. > + * > + * @aer_base: base address of AER capability block in RCRB > + * @aer_regs: destination for copying AER capability > + */ > +static bool cxl_rch_get_aer_info(void __iomem *aer_base, > + struct aer_capability_regs *aer_regs) > +{ > + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); > + u32 *aer_regs_buf = (u32 *)aer_regs; > + int n; > + > + if (!aer_base) > + return false; > + > + /* Use readl() to guarantee 32-bit accesses */ > + for (n = 0; n < read_cnt; n++) > + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); > + > + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); > + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); > + > + return true; > +} > + > +/* Get AER severity. Return false if there is no error. */ > +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, > + int *severity) > +{ > + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { > + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) > + *severity = AER_FATAL; > + else > + *severity = AER_NONFATAL; > + return true; > + } > + > + if (aer_regs->cor_status & ~aer_regs->cor_mask) { > + *severity = AER_CORRECTABLE; > + return true; > + } > + > + return false; > +} > + > +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) > +{ > + struct pci_dev *pdev = to_pci_dev(cxlds->dev); > + struct aer_capability_regs aer_regs; > + struct cxl_dport *dport; > + int severity; > + > + struct cxl_port *port __free(put_cxl_port) = > + cxl_pci_find_port(pdev, &dport); > + if (!port) > + return; > + > + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) > + return; > + > + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) > + return; > + > + pci_print_aer(pdev, severity, &aer_regs); > + if (severity == AER_CORRECTABLE) > + cxl_handle_cor_ras(cxlds, dport->regs.ras); > + else > + cxl_handle_ras(cxlds, dport->regs.ras); > +} > +#else > +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } > +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } > +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } > +#endif > + > +static void cxl_dport_map_ras(struct cxl_dport *dport) > +{ > + struct cxl_register_map *map = &dport->reg_map; > + struct device *dev = dport->dport_dev; > + > + if (!map->component_map.ras.valid) > + dev_dbg(dev, "RAS registers not found\n"); > + else if (cxl_map_component_regs(map, &dport->regs.component, > + BIT(CXL_CM_CAP_CAP_ID_RAS))) > + dev_dbg(dev, "Failed to map RAS capability.\n"); > +} > > /** > * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport > @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) > return true; > } > > -/* > - * Copy the AER capability registers using 32 bit read accesses. > - * This is necessary because RCRB AER capability is MMIO mapped. Clear the > - * status after copying. > - * > - * @aer_base: base address of AER capability block in RCRB > - * @aer_regs: destination for copying AER capability > - */ > -static bool cxl_rch_get_aer_info(void __iomem *aer_base, > - struct aer_capability_regs *aer_regs) > -{ > - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); > - u32 *aer_regs_buf = (u32 *)aer_regs; > - int n; > - > - if (!aer_base) > - return false; > - > - /* Use readl() to guarantee 32-bit accesses */ > - for (n = 0; n < read_cnt; n++) > - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); > - > - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); > - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); > - > - return true; > -} > - > -/* Get AER severity. Return false if there is no error. */ > -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, > - int *severity) > -{ > - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { > - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) > - *severity = AER_FATAL; > - else > - *severity = AER_NONFATAL; > - return true; > - } > - > - if (aer_regs->cor_status & ~aer_regs->cor_mask) { > - *severity = AER_CORRECTABLE; > - return true; > - } > - > - return false; > -} > - > -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) > -{ > - struct pci_dev *pdev = to_pci_dev(cxlds->dev); > - struct aer_capability_regs aer_regs; > - struct cxl_dport *dport; > - int severity; > - > - struct cxl_port *port __free(put_cxl_port) = > - cxl_pci_find_port(pdev, &dport); > - if (!port) > - return; > - > - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) > - return; > - > - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) > - return; > - > - pci_print_aer(pdev, severity, &aer_regs); > - if (severity == AER_CORRECTABLE) > - cxl_handle_cor_ras(cxlds, dport->regs.ras); > - else > - cxl_handle_ras(cxlds, dport->regs.ras); > -} > - > void cxl_cor_error_detected(struct pci_dev *pdev) > { > struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 8/29/2025 10:33 AM, Jonathan Cameron wrote: > On Tue, 26 Aug 2025 20:35:20 -0500 > Terry Bowman <terry.bowman@amd.com> wrote: > >> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct >> from the CXL Virtual Hierarchy (VH) handling. This is because of the >> differences in the RCH and VH topologies. Improve the maintainability and >> add ability to enable/disable RCH handling. >> >> Move and combine the RCH handling code into a single block conditionally >> compiled with the CONFIG_CXL_RCH_RAS kernel config. >> >> Signed-off-by: Terry Bowman <terry.bowman@amd.com> >> > How painful to move this to a ras_rch.c file and conditionally compile that? > > Would want to do that is some merged thing with patch 1 though, rather than > moving at least some of the code twice. > I don't see an issue and the effort would be a simple rework of patch1 as you mentioned. But, it would drop the 'reviewed-by' sign-offs. Should we check with others about this too? Terry > >> --- >> v10->v11: >> - New patch >> --- >> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- >> 1 file changed, 90 insertions(+), 85 deletions(-) >> >> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c >> index 0875ce8116ff..f42f9a255ef8 100644 >> --- a/drivers/cxl/core/ras.c >> +++ b/drivers/cxl/core/ras.c >> @@ -126,6 +126,7 @@ void cxl_ras_exit(void) >> cancel_work_sync(&cxl_cper_prot_err_work); >> } >> >> +#ifdef CONFIG_CXL_RCH_RAS >> static void cxl_dport_map_rch_aer(struct cxl_dport *dport) >> { >> resource_size_t aer_phys; >> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) >> } >> } >> >> -static void cxl_dport_map_ras(struct cxl_dport *dport) >> -{ >> - struct cxl_register_map *map = &dport->reg_map; >> - struct device *dev = dport->dport_dev; >> - >> - if (!map->component_map.ras.valid) >> - dev_dbg(dev, "RAS registers not found\n"); >> - else if (cxl_map_component_regs(map, &dport->regs.component, >> - BIT(CXL_CM_CAP_CAP_ID_RAS))) >> - dev_dbg(dev, "Failed to map RAS capability.\n"); >> -} >> - >> static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >> { >> void __iomem *aer_base = dport->regs.dport_aer; >> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); >> } >> >> +/* >> + * Copy the AER capability registers using 32 bit read accesses. >> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the >> + * status after copying. >> + * >> + * @aer_base: base address of AER capability block in RCRB >> + * @aer_regs: destination for copying AER capability >> + */ >> +static bool cxl_rch_get_aer_info(void __iomem *aer_base, >> + struct aer_capability_regs *aer_regs) >> +{ >> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); >> + u32 *aer_regs_buf = (u32 *)aer_regs; >> + int n; >> + >> + if (!aer_base) >> + return false; >> + >> + /* Use readl() to guarantee 32-bit accesses */ >> + for (n = 0; n < read_cnt; n++) >> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); >> + >> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); >> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); >> + >> + return true; >> +} >> + >> +/* Get AER severity. Return false if there is no error. */ >> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, >> + int *severity) >> +{ >> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { >> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) >> + *severity = AER_FATAL; >> + else >> + *severity = AER_NONFATAL; >> + return true; >> + } >> + >> + if (aer_regs->cor_status & ~aer_regs->cor_mask) { >> + *severity = AER_CORRECTABLE; >> + return true; >> + } >> + >> + return false; >> +} >> + >> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) >> +{ >> + struct pci_dev *pdev = to_pci_dev(cxlds->dev); >> + struct aer_capability_regs aer_regs; >> + struct cxl_dport *dport; >> + int severity; >> + >> + struct cxl_port *port __free(put_cxl_port) = >> + cxl_pci_find_port(pdev, &dport); >> + if (!port) >> + return; >> + >> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) >> + return; >> + >> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) >> + return; >> + >> + pci_print_aer(pdev, severity, &aer_regs); >> + if (severity == AER_CORRECTABLE) >> + cxl_handle_cor_ras(cxlds, dport->regs.ras); >> + else >> + cxl_handle_ras(cxlds, dport->regs.ras); >> +} >> +#else >> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } >> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } >> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } >> +#endif >> + >> +static void cxl_dport_map_ras(struct cxl_dport *dport) >> +{ >> + struct cxl_register_map *map = &dport->reg_map; >> + struct device *dev = dport->dport_dev; >> + >> + if (!map->component_map.ras.valid) >> + dev_dbg(dev, "RAS registers not found\n"); >> + else if (cxl_map_component_regs(map, &dport->regs.component, >> + BIT(CXL_CM_CAP_CAP_ID_RAS))) >> + dev_dbg(dev, "Failed to map RAS capability.\n"); >> +} >> >> /** >> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport >> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) >> return true; >> } >> >> -/* >> - * Copy the AER capability registers using 32 bit read accesses. >> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the >> - * status after copying. >> - * >> - * @aer_base: base address of AER capability block in RCRB >> - * @aer_regs: destination for copying AER capability >> - */ >> -static bool cxl_rch_get_aer_info(void __iomem *aer_base, >> - struct aer_capability_regs *aer_regs) >> -{ >> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); >> - u32 *aer_regs_buf = (u32 *)aer_regs; >> - int n; >> - >> - if (!aer_base) >> - return false; >> - >> - /* Use readl() to guarantee 32-bit accesses */ >> - for (n = 0; n < read_cnt; n++) >> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); >> - >> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); >> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); >> - >> - return true; >> -} >> - >> -/* Get AER severity. Return false if there is no error. */ >> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, >> - int *severity) >> -{ >> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { >> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) >> - *severity = AER_FATAL; >> - else >> - *severity = AER_NONFATAL; >> - return true; >> - } >> - >> - if (aer_regs->cor_status & ~aer_regs->cor_mask) { >> - *severity = AER_CORRECTABLE; >> - return true; >> - } >> - >> - return false; >> -} >> - >> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) >> -{ >> - struct pci_dev *pdev = to_pci_dev(cxlds->dev); >> - struct aer_capability_regs aer_regs; >> - struct cxl_dport *dport; >> - int severity; >> - >> - struct cxl_port *port __free(put_cxl_port) = >> - cxl_pci_find_port(pdev, &dport); >> - if (!port) >> - return; >> - >> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) >> - return; >> - >> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) >> - return; >> - >> - pci_print_aer(pdev, severity, &aer_regs); >> - if (severity == AER_CORRECTABLE) >> - cxl_handle_cor_ras(cxlds, dport->regs.ras); >> - else >> - cxl_handle_ras(cxlds, dport->regs.ras); >> -} >> - >> void cxl_cor_error_detected(struct pci_dev *pdev) >> { >> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 9/11/25 10:48 AM, Bowman, Terry wrote: > > > On 8/29/2025 10:33 AM, Jonathan Cameron wrote: >> On Tue, 26 Aug 2025 20:35:20 -0500 >> Terry Bowman <terry.bowman@amd.com> wrote: >> >>> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct >>> from the CXL Virtual Hierarchy (VH) handling. This is because of the >>> differences in the RCH and VH topologies. Improve the maintainability and >>> add ability to enable/disable RCH handling. >>> >>> Move and combine the RCH handling code into a single block conditionally >>> compiled with the CONFIG_CXL_RCH_RAS kernel config. >>> >>> Signed-off-by: Terry Bowman <terry.bowman@amd.com> >>> >> How painful to move this to a ras_rch.c file and conditionally compile that? >> >> Would want to do that is some merged thing with patch 1 though, rather than >> moving at least some of the code twice. >> > > I don't see an issue and the effort would be a simple rework of patch1 as you > mentioned. But, it would drop the 'reviewed-by' sign-offs. Should we check with > others about this too? I would go ahead and do it. > > Terry > >> >>> --- >>> v10->v11: >>> - New patch >>> --- >>> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- >>> 1 file changed, 90 insertions(+), 85 deletions(-) >>> >>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c >>> index 0875ce8116ff..f42f9a255ef8 100644 >>> --- a/drivers/cxl/core/ras.c >>> +++ b/drivers/cxl/core/ras.c >>> @@ -126,6 +126,7 @@ void cxl_ras_exit(void) >>> cancel_work_sync(&cxl_cper_prot_err_work); >>> } >>> >>> +#ifdef CONFIG_CXL_RCH_RAS >>> static void cxl_dport_map_rch_aer(struct cxl_dport *dport) >>> { >>> resource_size_t aer_phys; >>> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) >>> } >>> } >>> >>> -static void cxl_dport_map_ras(struct cxl_dport *dport) >>> -{ >>> - struct cxl_register_map *map = &dport->reg_map; >>> - struct device *dev = dport->dport_dev; >>> - >>> - if (!map->component_map.ras.valid) >>> - dev_dbg(dev, "RAS registers not found\n"); >>> - else if (cxl_map_component_regs(map, &dport->regs.component, >>> - BIT(CXL_CM_CAP_CAP_ID_RAS))) >>> - dev_dbg(dev, "Failed to map RAS capability.\n"); >>> -} >>> - >>> static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >>> { >>> void __iomem *aer_base = dport->regs.dport_aer; >>> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >>> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); >>> } >>> >>> +/* >>> + * Copy the AER capability registers using 32 bit read accesses. >>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the >>> + * status after copying. >>> + * >>> + * @aer_base: base address of AER capability block in RCRB >>> + * @aer_regs: destination for copying AER capability >>> + */ >>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base, >>> + struct aer_capability_regs *aer_regs) >>> +{ >>> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); >>> + u32 *aer_regs_buf = (u32 *)aer_regs; >>> + int n; >>> + >>> + if (!aer_base) >>> + return false; >>> + >>> + /* Use readl() to guarantee 32-bit accesses */ >>> + for (n = 0; n < read_cnt; n++) >>> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); >>> + >>> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); >>> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); >>> + >>> + return true; >>> +} >>> + >>> +/* Get AER severity. Return false if there is no error. */ >>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, >>> + int *severity) >>> +{ >>> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { >>> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) >>> + *severity = AER_FATAL; >>> + else >>> + *severity = AER_NONFATAL; >>> + return true; >>> + } >>> + >>> + if (aer_regs->cor_status & ~aer_regs->cor_mask) { >>> + *severity = AER_CORRECTABLE; >>> + return true; >>> + } >>> + >>> + return false; >>> +} >>> + >>> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) >>> +{ >>> + struct pci_dev *pdev = to_pci_dev(cxlds->dev); >>> + struct aer_capability_regs aer_regs; >>> + struct cxl_dport *dport; >>> + int severity; >>> + >>> + struct cxl_port *port __free(put_cxl_port) = >>> + cxl_pci_find_port(pdev, &dport); >>> + if (!port) >>> + return; >>> + >>> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) >>> + return; >>> + >>> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) >>> + return; >>> + >>> + pci_print_aer(pdev, severity, &aer_regs); >>> + if (severity == AER_CORRECTABLE) >>> + cxl_handle_cor_ras(cxlds, dport->regs.ras); >>> + else >>> + cxl_handle_ras(cxlds, dport->regs.ras); >>> +} >>> +#else >>> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } >>> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } >>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } >>> +#endif >>> + >>> +static void cxl_dport_map_ras(struct cxl_dport *dport) >>> +{ >>> + struct cxl_register_map *map = &dport->reg_map; >>> + struct device *dev = dport->dport_dev; >>> + >>> + if (!map->component_map.ras.valid) >>> + dev_dbg(dev, "RAS registers not found\n"); >>> + else if (cxl_map_component_regs(map, &dport->regs.component, >>> + BIT(CXL_CM_CAP_CAP_ID_RAS))) >>> + dev_dbg(dev, "Failed to map RAS capability.\n"); >>> +} >>> >>> /** >>> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport >>> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) >>> return true; >>> } >>> >>> -/* >>> - * Copy the AER capability registers using 32 bit read accesses. >>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the >>> - * status after copying. >>> - * >>> - * @aer_base: base address of AER capability block in RCRB >>> - * @aer_regs: destination for copying AER capability >>> - */ >>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base, >>> - struct aer_capability_regs *aer_regs) >>> -{ >>> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); >>> - u32 *aer_regs_buf = (u32 *)aer_regs; >>> - int n; >>> - >>> - if (!aer_base) >>> - return false; >>> - >>> - /* Use readl() to guarantee 32-bit accesses */ >>> - for (n = 0; n < read_cnt; n++) >>> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); >>> - >>> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); >>> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); >>> - >>> - return true; >>> -} >>> - >>> -/* Get AER severity. Return false if there is no error. */ >>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, >>> - int *severity) >>> -{ >>> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { >>> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) >>> - *severity = AER_FATAL; >>> - else >>> - *severity = AER_NONFATAL; >>> - return true; >>> - } >>> - >>> - if (aer_regs->cor_status & ~aer_regs->cor_mask) { >>> - *severity = AER_CORRECTABLE; >>> - return true; >>> - } >>> - >>> - return false; >>> -} >>> - >>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) >>> -{ >>> - struct pci_dev *pdev = to_pci_dev(cxlds->dev); >>> - struct aer_capability_regs aer_regs; >>> - struct cxl_dport *dport; >>> - int severity; >>> - >>> - struct cxl_port *port __free(put_cxl_port) = >>> - cxl_pci_find_port(pdev, &dport); >>> - if (!port) >>> - return; >>> - >>> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) >>> - return; >>> - >>> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) >>> - return; >>> - >>> - pci_print_aer(pdev, severity, &aer_regs); >>> - if (severity == AER_CORRECTABLE) >>> - cxl_handle_cor_ras(cxlds, dport->regs.ras); >>> - else >>> - cxl_handle_ras(cxlds, dport->regs.ras); >>> -} >>> - >>> void cxl_cor_error_detected(struct pci_dev *pdev) >>> { >>> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); >
On 9/11/2025 2:41 PM, Dave Jiang wrote: > > On 9/11/25 10:48 AM, Bowman, Terry wrote: >> >> On 8/29/2025 10:33 AM, Jonathan Cameron wrote: >>> On Tue, 26 Aug 2025 20:35:20 -0500 >>> Terry Bowman <terry.bowman@amd.com> wrote: >>> >>>> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct >>>> from the CXL Virtual Hierarchy (VH) handling. This is because of the >>>> differences in the RCH and VH topologies. Improve the maintainability and >>>> add ability to enable/disable RCH handling. >>>> >>>> Move and combine the RCH handling code into a single block conditionally >>>> compiled with the CONFIG_CXL_RCH_RAS kernel config. >>>> >>>> Signed-off-by: Terry Bowman <terry.bowman@amd.com> >>>> >>> How painful to move this to a ras_rch.c file and conditionally compile that? >>> >>> Would want to do that is some merged thing with patch 1 though, rather than >>> moving at least some of the code twice. >>> >> I don't see an issue and the effort would be a simple rework of patch1 as you >> mentioned. But, it would drop the 'reviewed-by' sign-offs. Should we check with >> others about this too? > I would go ahead and do it. > Dan mentioned he prefers leaving it all together in drivers/cxl/ras.c. https://lore.kernel.org/linux-cxl/68829db3d3e63_134cc710080@dwillia2-xfh.jf.intel.com.notmuch/ Terry >> Terry >> >>>> --- >>>> v10->v11: >>>> - New patch >>>> --- >>>> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- >>>> 1 file changed, 90 insertions(+), 85 deletions(-) >>>> >>>> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c >>>> index 0875ce8116ff..f42f9a255ef8 100644 >>>> --- a/drivers/cxl/core/ras.c >>>> +++ b/drivers/cxl/core/ras.c >>>> @@ -126,6 +126,7 @@ void cxl_ras_exit(void) >>>> cancel_work_sync(&cxl_cper_prot_err_work); >>>> } >>>> >>>> +#ifdef CONFIG_CXL_RCH_RAS >>>> static void cxl_dport_map_rch_aer(struct cxl_dport *dport) >>>> { >>>> resource_size_t aer_phys; >>>> @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) >>>> } >>>> } >>>> >>>> -static void cxl_dport_map_ras(struct cxl_dport *dport) >>>> -{ >>>> - struct cxl_register_map *map = &dport->reg_map; >>>> - struct device *dev = dport->dport_dev; >>>> - >>>> - if (!map->component_map.ras.valid) >>>> - dev_dbg(dev, "RAS registers not found\n"); >>>> - else if (cxl_map_component_regs(map, &dport->regs.component, >>>> - BIT(CXL_CM_CAP_CAP_ID_RAS))) >>>> - dev_dbg(dev, "Failed to map RAS capability.\n"); >>>> -} >>>> - >>>> static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >>>> { >>>> void __iomem *aer_base = dport->regs.dport_aer; >>>> @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) >>>> writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); >>>> } >>>> >>>> +/* >>>> + * Copy the AER capability registers using 32 bit read accesses. >>>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the >>>> + * status after copying. >>>> + * >>>> + * @aer_base: base address of AER capability block in RCRB >>>> + * @aer_regs: destination for copying AER capability >>>> + */ >>>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base, >>>> + struct aer_capability_regs *aer_regs) >>>> +{ >>>> + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); >>>> + u32 *aer_regs_buf = (u32 *)aer_regs; >>>> + int n; >>>> + >>>> + if (!aer_base) >>>> + return false; >>>> + >>>> + /* Use readl() to guarantee 32-bit accesses */ >>>> + for (n = 0; n < read_cnt; n++) >>>> + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); >>>> + >>>> + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); >>>> + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); >>>> + >>>> + return true; >>>> +} >>>> + >>>> +/* Get AER severity. Return false if there is no error. */ >>>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, >>>> + int *severity) >>>> +{ >>>> + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { >>>> + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) >>>> + *severity = AER_FATAL; >>>> + else >>>> + *severity = AER_NONFATAL; >>>> + return true; >>>> + } >>>> + >>>> + if (aer_regs->cor_status & ~aer_regs->cor_mask) { >>>> + *severity = AER_CORRECTABLE; >>>> + return true; >>>> + } >>>> + >>>> + return false; >>>> +} >>>> + >>>> +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) >>>> +{ >>>> + struct pci_dev *pdev = to_pci_dev(cxlds->dev); >>>> + struct aer_capability_regs aer_regs; >>>> + struct cxl_dport *dport; >>>> + int severity; >>>> + >>>> + struct cxl_port *port __free(put_cxl_port) = >>>> + cxl_pci_find_port(pdev, &dport); >>>> + if (!port) >>>> + return; >>>> + >>>> + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) >>>> + return; >>>> + >>>> + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) >>>> + return; >>>> + >>>> + pci_print_aer(pdev, severity, &aer_regs); >>>> + if (severity == AER_CORRECTABLE) >>>> + cxl_handle_cor_ras(cxlds, dport->regs.ras); >>>> + else >>>> + cxl_handle_ras(cxlds, dport->regs.ras); >>>> +} >>>> +#else >>>> +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } >>>> +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } >>>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } >>>> +#endif >>>> + >>>> +static void cxl_dport_map_ras(struct cxl_dport *dport) >>>> +{ >>>> + struct cxl_register_map *map = &dport->reg_map; >>>> + struct device *dev = dport->dport_dev; >>>> + >>>> + if (!map->component_map.ras.valid) >>>> + dev_dbg(dev, "RAS registers not found\n"); >>>> + else if (cxl_map_component_regs(map, &dport->regs.component, >>>> + BIT(CXL_CM_CAP_CAP_ID_RAS))) >>>> + dev_dbg(dev, "Failed to map RAS capability.\n"); >>>> +} >>>> >>>> /** >>>> * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport >>>> @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) >>>> return true; >>>> } >>>> >>>> -/* >>>> - * Copy the AER capability registers using 32 bit read accesses. >>>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the >>>> - * status after copying. >>>> - * >>>> - * @aer_base: base address of AER capability block in RCRB >>>> - * @aer_regs: destination for copying AER capability >>>> - */ >>>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base, >>>> - struct aer_capability_regs *aer_regs) >>>> -{ >>>> - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); >>>> - u32 *aer_regs_buf = (u32 *)aer_regs; >>>> - int n; >>>> - >>>> - if (!aer_base) >>>> - return false; >>>> - >>>> - /* Use readl() to guarantee 32-bit accesses */ >>>> - for (n = 0; n < read_cnt; n++) >>>> - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); >>>> - >>>> - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); >>>> - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); >>>> - >>>> - return true; >>>> -} >>>> - >>>> -/* Get AER severity. Return false if there is no error. */ >>>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, >>>> - int *severity) >>>> -{ >>>> - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { >>>> - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) >>>> - *severity = AER_FATAL; >>>> - else >>>> - *severity = AER_NONFATAL; >>>> - return true; >>>> - } >>>> - >>>> - if (aer_regs->cor_status & ~aer_regs->cor_mask) { >>>> - *severity = AER_CORRECTABLE; >>>> - return true; >>>> - } >>>> - >>>> - return false; >>>> -} >>>> - >>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) >>>> -{ >>>> - struct pci_dev *pdev = to_pci_dev(cxlds->dev); >>>> - struct aer_capability_regs aer_regs; >>>> - struct cxl_dport *dport; >>>> - int severity; >>>> - >>>> - struct cxl_port *port __free(put_cxl_port) = >>>> - cxl_pci_find_port(pdev, &dport); >>>> - if (!port) >>>> - return; >>>> - >>>> - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) >>>> - return; >>>> - >>>> - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) >>>> - return; >>>> - >>>> - pci_print_aer(pdev, severity, &aer_regs); >>>> - if (severity == AER_CORRECTABLE) >>>> - cxl_handle_cor_ras(cxlds, dport->regs.ras); >>>> - else >>>> - cxl_handle_ras(cxlds, dport->regs.ras); >>>> -} >>>> - >>>> void cxl_cor_error_detected(struct pci_dev *pdev) >>>> { >>>> struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 8/27/25 02:35, Terry Bowman wrote: > Restricted CXL Host (RCH) protocol error handling uses a procedure distinct > from the CXL Virtual Hierarchy (VH) handling. This is because of the > differences in the RCH and VH topologies. Improve the maintainability and > add ability to enable/disable RCH handling. > > Move and combine the RCH handling code into a single block conditionally > compiled with the CONFIG_CXL_RCH_RAS kernel config. > > Signed-off-by: Terry Bowman <terry.bowman@amd.com> > > --- > v10->v11: > - New patch > --- > drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- > 1 file changed, 90 insertions(+), 85 deletions(-) > > diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c > index 0875ce8116ff..f42f9a255ef8 100644 > --- a/drivers/cxl/core/ras.c > +++ b/drivers/cxl/core/ras.c > @@ -126,6 +126,7 @@ void cxl_ras_exit(void) > cancel_work_sync(&cxl_cper_prot_err_work); > } > > +#ifdef CONFIG_CXL_RCH_RAS You are introducing CONFIG_CXL_RCH_RAS in the next patch. Is it correct to use it here? > static void cxl_dport_map_rch_aer(struct cxl_dport *dport) > { > resource_size_t aer_phys; > @@ -141,18 +142,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) > } > } > > -static void cxl_dport_map_ras(struct cxl_dport *dport) > -{ > - struct cxl_register_map *map = &dport->reg_map; > - struct device *dev = dport->dport_dev; > - > - if (!map->component_map.ras.valid) > - dev_dbg(dev, "RAS registers not found\n"); > - else if (cxl_map_component_regs(map, &dport->regs.component, > - BIT(CXL_CM_CAP_CAP_ID_RAS))) > - dev_dbg(dev, "Failed to map RAS capability.\n"); > -} > - > static void cxl_disable_rch_root_ints(struct cxl_dport *dport) > { > void __iomem *aer_base = dport->regs.dport_aer; > @@ -177,6 +166,95 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) > writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); > } > > +/* > + * Copy the AER capability registers using 32 bit read accesses. > + * This is necessary because RCRB AER capability is MMIO mapped. Clear the > + * status after copying. > + * > + * @aer_base: base address of AER capability block in RCRB > + * @aer_regs: destination for copying AER capability > + */ > +static bool cxl_rch_get_aer_info(void __iomem *aer_base, > + struct aer_capability_regs *aer_regs) > +{ > + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); > + u32 *aer_regs_buf = (u32 *)aer_regs; > + int n; > + > + if (!aer_base) > + return false; > + > + /* Use readl() to guarantee 32-bit accesses */ > + for (n = 0; n < read_cnt; n++) > + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); > + > + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); > + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); > + > + return true; > +} > + > +/* Get AER severity. Return false if there is no error. */ > +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, > + int *severity) > +{ > + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { > + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) > + *severity = AER_FATAL; > + else > + *severity = AER_NONFATAL; > + return true; > + } > + > + if (aer_regs->cor_status & ~aer_regs->cor_mask) { > + *severity = AER_CORRECTABLE; > + return true; > + } > + > + return false; > +} > + > +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) > +{ > + struct pci_dev *pdev = to_pci_dev(cxlds->dev); > + struct aer_capability_regs aer_regs; > + struct cxl_dport *dport; > + int severity; > + > + struct cxl_port *port __free(put_cxl_port) = > + cxl_pci_find_port(pdev, &dport); > + if (!port) > + return; > + > + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) > + return; > + > + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) > + return; > + > + pci_print_aer(pdev, severity, &aer_regs); > + if (severity == AER_CORRECTABLE) > + cxl_handle_cor_ras(cxlds, dport->regs.ras); > + else > + cxl_handle_ras(cxlds, dport->regs.ras); > +} > +#else > +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } > +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } > +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } > +#endif > + > +static void cxl_dport_map_ras(struct cxl_dport *dport) > +{ > + struct cxl_register_map *map = &dport->reg_map; > + struct device *dev = dport->dport_dev; > + > + if (!map->component_map.ras.valid) > + dev_dbg(dev, "RAS registers not found\n"); > + else if (cxl_map_component_regs(map, &dport->regs.component, > + BIT(CXL_CM_CAP_CAP_ID_RAS))) > + dev_dbg(dev, "Failed to map RAS capability.\n"); > +} > > /** > * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport > @@ -270,79 +348,6 @@ static bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) > return true; > } > > -/* > - * Copy the AER capability registers using 32 bit read accesses. > - * This is necessary because RCRB AER capability is MMIO mapped. Clear the > - * status after copying. > - * > - * @aer_base: base address of AER capability block in RCRB > - * @aer_regs: destination for copying AER capability > - */ > -static bool cxl_rch_get_aer_info(void __iomem *aer_base, > - struct aer_capability_regs *aer_regs) > -{ > - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); > - u32 *aer_regs_buf = (u32 *)aer_regs; > - int n; > - > - if (!aer_base) > - return false; > - > - /* Use readl() to guarantee 32-bit accesses */ > - for (n = 0; n < read_cnt; n++) > - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); > - > - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); > - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); > - > - return true; > -} > - > -/* Get AER severity. Return false if there is no error. */ > -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, > - int *severity) > -{ > - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { > - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) > - *severity = AER_FATAL; > - else > - *severity = AER_NONFATAL; > - return true; > - } > - > - if (aer_regs->cor_status & ~aer_regs->cor_mask) { > - *severity = AER_CORRECTABLE; > - return true; > - } > - > - return false; > -} > - > -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) > -{ > - struct pci_dev *pdev = to_pci_dev(cxlds->dev); > - struct aer_capability_regs aer_regs; > - struct cxl_dport *dport; > - int severity; > - > - struct cxl_port *port __free(put_cxl_port) = > - cxl_pci_find_port(pdev, &dport); > - if (!port) > - return; > - > - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) > - return; > - > - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) > - return; > - > - pci_print_aer(pdev, severity, &aer_regs); > - if (severity == AER_CORRECTABLE) > - cxl_handle_cor_ras(cxlds, dport->regs.ras); > - else > - cxl_handle_ras(cxlds, dport->regs.ras); > -} > - > void cxl_cor_error_detected(struct pci_dev *pdev) > { > struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
On 8/28/2025 3:57 AM, Alejandro Lucero Palau wrote: > On 8/27/25 02:35, Terry Bowman wrote: >> Restricted CXL Host (RCH) protocol error handling uses a procedure distinct >> from the CXL Virtual Hierarchy (VH) handling. This is because of the >> differences in the RCH and VH topologies. Improve the maintainability and >> add ability to enable/disable RCH handling. >> >> Move and combine the RCH handling code into a single block conditionally >> compiled with the CONFIG_CXL_RCH_RAS kernel config. >> >> Signed-off-by: Terry Bowman <terry.bowman@amd.com> >> >> --- >> v10->v11: >> - New patch >> --- >> drivers/cxl/core/ras.c | 175 +++++++++++++++++++++-------------------- >> 1 file changed, 90 insertions(+), 85 deletions(-) >> >> diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c >> index 0875ce8116ff..f42f9a255ef8 100644 >> --- a/drivers/cxl/core/ras.c >> +++ b/drivers/cxl/core/ras.c >> @@ -126,6 +126,7 @@ void cxl_ras_exit(void) >> cancel_work_sync(&cxl_cper_prot_err_work); >> } >> >> +#ifdef CONFIG_CXL_RCH_RAS > > You are introducing CONFIG_CXL_RCH_RAS in the next patch. Is it correct > to use it here? > You are correct. I need to move the introduction of Kconfig's CONFIG_CXL_RCH_RAS definition into this patch. Thanks. Terry
© 2016 - 2025 Red Hat, Inc.