RAS registers are not mapped for CXL root ports, CXL downstream switch
ports, or CXL upstream switch ports. To prepare for future RAS logging
and handling, the driver needs updating to map PCIe port RAS registers.
Refactor and rename cxl_setup_parent_dport() to be cxl_init_ep_ports_aer().
Update the function such that it will iterate an endpoint's dports to map
the RAS registers.
Rename cxl_dport_map_regs() to be cxl_dport_init_aer(). The new
function name is a more accurate description of the function's work.
This update should also include checking for previously mapped registers
within the topology, particularly with CXL switches. Endpoints under a
CXL switch may share a common downstream and upstream port, ensure that
the registers are only mapped once.
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
drivers/cxl/core/pci.c | 37 ++++++++++++++++---------------------
drivers/cxl/cxl.h | 7 ++++---
drivers/cxl/mem.c | 27 +++++++++++++++++++++++++--
3 files changed, 45 insertions(+), 26 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 51132a575b27..6f7bcdb389bf 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -787,21 +787,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
dport->regs.dport_aer = dport_aer;
}
-static void cxl_dport_map_regs(struct cxl_dport *dport)
-{
- struct cxl_register_map *map = &dport->reg_map;
- struct device *dev = dport->dport_dev;
-
- if (!map->component_map.ras.valid)
- dev_dbg(dev, "RAS registers not found\n");
- else if (cxl_map_component_regs(map, &dport->regs.component,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
- dev_dbg(dev, "Failed to map RAS capability.\n");
-
- if (dport->rch)
- cxl_dport_map_rch_aer(dport);
-}
-
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
{
void __iomem *aer_base = dport->regs.dport_aer;
@@ -831,7 +816,7 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
}
}
-void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
+void cxl_dport_init_aer(struct cxl_dport *dport)
{
struct device *dport_dev = dport->dport_dev;
@@ -840,15 +825,25 @@ void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
if (host_bridge->native_aer)
dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base);
+
+ cxl_dport_map_rch_aer(dport);
+ cxl_disable_rch_root_ints(dport);
}
- dport->reg_map.host = host;
- cxl_dport_map_regs(dport);
+ /* dport may have more than 1 downstream EP. Check if already mapped. */
+ if (dport->regs.ras) {
+ dev_warn(dport_dev, "RAS is already mapped\n");
+ return;
+ }
- if (dport->rch)
- cxl_disable_rch_root_ints(dport);
+ dport->reg_map.host = dport_dev;
+ if (cxl_map_component_regs(&dport->reg_map, &dport->regs.component,
+ BIT(CXL_CM_CAP_CAP_ID_RAS))) {
+ dev_err(dport_dev, "Failed to map RAS capability.\n");
+ return;
+ }
}
-EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL);
+EXPORT_SYMBOL_NS_GPL(cxl_dport_init_aer, CXL);
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 9afb407d438f..cb9e05e2912b 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -592,6 +592,7 @@ struct cxl_dax_region {
* @parent_dport: dport that points to this port in the parent
* @decoder_ida: allocator for decoder ids
* @reg_map: component and ras register mapping parameters
+ * @uport_regs: mapped component registers
* @nr_dports: number of entries in @dports
* @hdm_end: track last allocated HDM decoder instance for allocation ordering
* @commit_end: cursor to track highest committed decoder for commit ordering
@@ -612,6 +613,7 @@ struct cxl_port {
struct cxl_dport *parent_dport;
struct ida decoder_ida;
struct cxl_register_map reg_map;
+ struct cxl_component_regs uport_regs;
int nr_dports;
int hdm_end;
int commit_end;
@@ -761,10 +763,9 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
resource_size_t rcrb);
#ifdef CONFIG_PCIEAER_CXL
-void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
+void cxl_dport_init_aer(struct cxl_dport *dport);
#else
-static inline void cxl_setup_parent_dport(struct device *host,
- struct cxl_dport *dport) { }
+static inline void cxl_dport_init_aer(struct cxl_dport *dport) { }
#endif
struct cxl_decoder *to_cxl_decoder(struct device *dev);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 7de232eaeb17..b7204f010785 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -45,6 +45,30 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
return 0;
}
+static bool dev_is_cxl_pci(struct device *dev, u32 pcie_type)
+{
+ struct pci_dev *pdev;
+
+ if (!dev_is_pci(dev))
+ return false;
+
+ pdev = to_pci_dev(dev);
+ if (pci_pcie_type(pdev) != pcie_type)
+ return false;
+
+ return pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ CXL_DVSEC_REG_LOCATOR);
+}
+
+static void cxl_init_ep_ports_aer(struct cxl_ep *ep)
+{
+ struct cxl_dport *dport = ep->dport;
+
+ if (dev_is_cxl_pci(dport->dport_dev, PCI_EXP_TYPE_DOWNSTREAM) ||
+ dev_is_cxl_pci(dport->dport_dev, PCI_EXP_TYPE_ROOT_PORT))
+ cxl_dport_init_aer(dport);
+}
+
static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
struct cxl_dport *parent_dport)
{
@@ -62,6 +86,7 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
ep = cxl_ep_load(iter, cxlmd);
ep->next = down;
+ cxl_init_ep_ports_aer(ep);
}
/* Note: endpoint port component registers are derived from @cxlds */
@@ -166,8 +191,6 @@ static int cxl_mem_probe(struct device *dev)
else
endpoint_parent = &parent_port->dev;
- cxl_setup_parent_dport(dev, dport);
-
device_lock(endpoint_parent);
if (!endpoint_parent->driver) {
dev_err(dev, "CXL port topology %s not enabled\n",
--
2.34.1
On Tue, 8 Oct 2024 17:16:51 -0500
Terry Bowman <terry.bowman@amd.com> wrote:
> RAS registers are not mapped for CXL root ports, CXL downstream switch
> ports, or CXL upstream switch ports. To prepare for future RAS logging
> and handling, the driver needs updating to map PCIe port RAS registers.
Give the upstream port is in next patch, I'd just mention that you
are adding mapping of RP and DSP here (This confused me before I noticed
the next patch).
>
> Refactor and rename cxl_setup_parent_dport() to be cxl_init_ep_ports_aer().
> Update the function such that it will iterate an endpoint's dports to map
> the RAS registers.
>
> Rename cxl_dport_map_regs() to be cxl_dport_init_aer(). The new
> function name is a more accurate description of the function's work.
>
> This update should also include checking for previously mapped registers
> within the topology, particularly with CXL switches. Endpoints under a
> CXL switch may share a common downstream and upstream port, ensure that
> the registers are only mapped once.
I don't understand why we need to do this for the ras registers but
it doesn't apply for HDM decoders for instance? Why can't
we map these registers in cxl_port_probe()?
End of day here, so maybe I'm completely misunderstanding this.
Will take another look tomorrow morning.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
> drivers/cxl/core/pci.c | 37 ++++++++++++++++---------------------
> drivers/cxl/cxl.h | 7 ++++---
> drivers/cxl/mem.c | 27 +++++++++++++++++++++++++--
> 3 files changed, 45 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 51132a575b27..6f7bcdb389bf 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -787,21 +787,6 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> dport->regs.dport_aer = dport_aer;
> }
>
> -static void cxl_dport_map_regs(struct cxl_dport *dport)
> -{
> - struct cxl_register_map *map = &dport->reg_map;
> - struct device *dev = dport->dport_dev;
> -
> - if (!map->component_map.ras.valid)
> - dev_dbg(dev, "RAS registers not found\n");
> - else if (cxl_map_component_regs(map, &dport->regs.component,
> - BIT(CXL_CM_CAP_CAP_ID_RAS)))
> - dev_dbg(dev, "Failed to map RAS capability.\n");
> -
> - if (dport->rch)
> - cxl_dport_map_rch_aer(dport);
> -}
> -
> static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> {
> void __iomem *aer_base = dport->regs.dport_aer;
> @@ -831,7 +816,7 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> }
> }
>
> -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
> +void cxl_dport_init_aer(struct cxl_dport *dport)
> {
> struct device *dport_dev = dport->dport_dev;
>
> @@ -840,15 +825,25 @@ void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
>
> if (host_bridge->native_aer)
> dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base);
> +
> + cxl_dport_map_rch_aer(dport);
> + cxl_disable_rch_root_ints(dport);
> }
>
> - dport->reg_map.host = host;
> - cxl_dport_map_regs(dport);
> + /* dport may have more than 1 downstream EP. Check if already mapped. */
> + if (dport->regs.ras) {
> + dev_warn(dport_dev, "RAS is already mapped\n");
This is valid. Why are we warning?
However why do we need this dance here but not for other
root port registers etc.
> + return;
> + }
>
> - if (dport->rch)
> - cxl_disable_rch_root_ints(dport);
> + dport->reg_map.host = dport_dev;
> + if (cxl_map_component_regs(&dport->reg_map, &dport->regs.component,
> + BIT(CXL_CM_CAP_CAP_ID_RAS))) {
> + dev_err(dport_dev, "Failed to map RAS capability.\n");
> + return;
> + }
> }
> -EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL);
> +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_aer, CXL);
>
> static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
> struct cxl_dport *dport)
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 9afb407d438f..cb9e05e2912b 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -592,6 +592,7 @@ struct cxl_dax_region {
> * @parent_dport: dport that points to this port in the parent
> * @decoder_ida: allocator for decoder ids
> * @reg_map: component and ras register mapping parameters
> + * @uport_regs: mapped component registers
> * @nr_dports: number of entries in @dports
> * @hdm_end: track last allocated HDM decoder instance for allocation ordering
> * @commit_end: cursor to track highest committed decoder for commit ordering
> @@ -612,6 +613,7 @@ struct cxl_port {
> struct cxl_dport *parent_dport;
> struct ida decoder_ida;
> struct cxl_register_map reg_map;
> + struct cxl_component_regs uport_regs;
> int nr_dports;
> int hdm_end;
> int commit_end;
> @@ -761,10 +763,9 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
> resource_size_t rcrb);
>
> #ifdef CONFIG_PCIEAER_CXL
> -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
> +void cxl_dport_init_aer(struct cxl_dport *dport);
> #else
> -static inline void cxl_setup_parent_dport(struct device *host,
> - struct cxl_dport *dport) { }
> +static inline void cxl_dport_init_aer(struct cxl_dport *dport) { }
> #endif
>
> struct cxl_decoder *to_cxl_decoder(struct device *dev);
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 7de232eaeb17..b7204f010785 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -45,6 +45,30 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
> return 0;
> }
>
> +static bool dev_is_cxl_pci(struct device *dev, u32 pcie_type)
> +{
> + struct pci_dev *pdev;
> +
> + if (!dev_is_pci(dev))
> + return false;
> +
> + pdev = to_pci_dev(dev);
> + if (pci_pcie_type(pdev) != pcie_type)
> + return false;
> +
> + return pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> + CXL_DVSEC_REG_LOCATOR);
> +}
> +
> +static void cxl_init_ep_ports_aer(struct cxl_ep *ep)
> +{
> + struct cxl_dport *dport = ep->dport;
> +
> + if (dev_is_cxl_pci(dport->dport_dev, PCI_EXP_TYPE_DOWNSTREAM) ||
> + dev_is_cxl_pci(dport->dport_dev, PCI_EXP_TYPE_ROOT_PORT))
> + cxl_dport_init_aer(dport);
> +}
> +
> static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
> struct cxl_dport *parent_dport)
> {
> @@ -62,6 +86,7 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
>
> ep = cxl_ep_load(iter, cxlmd);
> ep->next = down;
> + cxl_init_ep_ports_aer(ep);
> }
>
> /* Note: endpoint port component registers are derived from @cxlds */
> @@ -166,8 +191,6 @@ static int cxl_mem_probe(struct device *dev)
> else
> endpoint_parent = &parent_port->dev;
>
> - cxl_setup_parent_dport(dev, dport);
> -
> device_lock(endpoint_parent);
> if (!endpoint_parent->driver) {
> dev_err(dev, "CXL port topology %s not enabled\n",
Hi Jonathan, On 10/16/24 12:14, Jonathan Cameron wrote: > On Tue, 8 Oct 2024 17:16:51 -0500 > Terry Bowman <terry.bowman@amd.com> wrote: > >> RAS registers are not mapped for CXL root ports, CXL downstream switch >> ports, or CXL upstream switch ports. To prepare for future RAS logging >> and handling, the driver needs updating to map PCIe port RAS registers. > > Give the upstream port is in next patch, I'd just mention that you > are adding mapping of RP and DSP here (This confused me before I noticed > the next patch). Ok. Good point, >> >> Refactor and rename cxl_setup_parent_dport() to be cxl_init_ep_ports_aer(). >> Update the function such that it will iterate an endpoint's dports to map >> the RAS registers. >> >> Rename cxl_dport_map_regs() to be cxl_dport_init_aer(). The new >> function name is a more accurate description of the function's work. >> >> This update should also include checking for previously mapped registers >> within the topology, particularly with CXL switches. Endpoints under a >> CXL switch may share a common downstream and upstream port, ensure that >> the registers are only mapped once. > > I don't understand why we need to do this for the ras registers but > it doesn't apply for HDM decoders for instance? Why can't > we map these registers in cxl_port_probe()? > We have seen downstream root ports with DVSECs that are not fully populated immediately after booting. The plan here was to push out the RAS register block mapping until as late as possible, in the memdev driver. > End of day here, so maybe I'm completely misunderstanding this. > Will take another look tomorrow morning. > Thanks for your reviews. Regards, Terry
On Wed, 16 Oct 2024 13:16:34 -0500 Terry Bowman <Terry.Bowman@amd.com> wrote: > Hi Jonathan, > > On 10/16/24 12:14, Jonathan Cameron wrote: > > On Tue, 8 Oct 2024 17:16:51 -0500 > > Terry Bowman <terry.bowman@amd.com> wrote: > > > >> RAS registers are not mapped for CXL root ports, CXL downstream switch > >> ports, or CXL upstream switch ports. To prepare for future RAS logging > >> and handling, the driver needs updating to map PCIe port RAS registers. > > > > Give the upstream port is in next patch, I'd just mention that you > > are adding mapping of RP and DSP here (This confused me before I noticed > > the next patch). > > Ok. Good point, > > >> > >> Refactor and rename cxl_setup_parent_dport() to be cxl_init_ep_ports_aer(). > >> Update the function such that it will iterate an endpoint's dports to map > >> the RAS registers. > >> > >> Rename cxl_dport_map_regs() to be cxl_dport_init_aer(). The new > >> function name is a more accurate description of the function's work. > >> > >> This update should also include checking for previously mapped registers > >> within the topology, particularly with CXL switches. Endpoints under a > >> CXL switch may share a common downstream and upstream port, ensure that > >> the registers are only mapped once. > > > > I don't understand why we need to do this for the ras registers but > > it doesn't apply for HDM decoders for instance? Why can't > > we map these registers in cxl_port_probe()? > > > > We have seen downstream root ports with DVSECs that are not fully populated > immediately after booting. The plan here was to push out the RAS register > block mapping until as late as possible, in the memdev driver. That needs debugging because simply pushing it later like this is only going to make the race harder to hit unless we understand the 'why' of that. If there is a reason to delay, my gut feeling would be to delay the cxl_port_probe() until things are stable rather than just trying this a bit later. This might be the whole link must train before CXL registers are presented thing (a less than ideal corner of the CXL spec) but not sure it would mean they weren't available in cxl_port_probe() Jonathan > > > > End of day here, so maybe I'm completely misunderstanding this. > > Will take another look tomorrow morning. > > > > Thanks for your reviews. > > Regards, > Terry >
Hi Jonathan, On 10/17/2024 8:50 AM, Jonathan Cameron wrote: > On Wed, 16 Oct 2024 13:16:34 -0500 > Terry Bowman <Terry.Bowman@amd.com> wrote: > >> Hi Jonathan, >> >> On 10/16/24 12:14, Jonathan Cameron wrote: >>> On Tue, 8 Oct 2024 17:16:51 -0500 >>> Terry Bowman <terry.bowman@amd.com> wrote: >>> >>>> RAS registers are not mapped for CXL root ports, CXL downstream switch >>>> ports, or CXL upstream switch ports. To prepare for future RAS logging >>>> and handling, the driver needs updating to map PCIe port RAS registers. >>> >>> Give the upstream port is in next patch, I'd just mention that you >>> are adding mapping of RP and DSP here (This confused me before I noticed >>> the next patch). >> >> Ok. Good point, >> >>>> >>>> Refactor and rename cxl_setup_parent_dport() to be cxl_init_ep_ports_aer(). >>>> Update the function such that it will iterate an endpoint's dports to map >>>> the RAS registers. >>>> >>>> Rename cxl_dport_map_regs() to be cxl_dport_init_aer(). The new >>>> function name is a more accurate description of the function's work. >>>> >>>> This update should also include checking for previously mapped registers >>>> within the topology, particularly with CXL switches. Endpoints under a >>>> CXL switch may share a common downstream and upstream port, ensure that >>>> the registers are only mapped once. >>> >>> I don't understand why we need to do this for the ras registers but >>> it doesn't apply for HDM decoders for instance? Why can't >>> we map these registers in cxl_port_probe()? >>> >> >> We have seen downstream root ports with DVSECs that are not fully populated >> immediately after booting. The plan here was to push out the RAS register >> block mapping until as late as possible, in the memdev driver. > > That needs debugging because simply pushing it later like this is > only going to make the race harder to hit unless we understand the > 'why' of that. If there is a reason to delay, my gut feeling would > be to delay the cxl_port_probe() until things are stable rather > than just trying this a bit later. > > This might be the whole link must train before CXL registers are > presented thing (a less than ideal corner of the CXL spec) but not > sure it would mean they weren't available in cxl_port_probe() > > Jonathan > > > My understanding is there is no spec defined expectation for when CXL config registers are ready. We need Dan's feedback. He has asked several times for this to be located after adding the endpoint in the memdev driver. Regards, Terry
© 2016 - 2026 Red Hat, Inc.